mirror of
https://github.com/libretro/scummvm.git
synced 2025-01-11 04:06:12 +00:00
a97d8875f5
We now use a manual parser instead of a bison-generated one, and the new code to match said trees with parse trees matches sierra's more closely. Also change the parse/spec tree nodes to use direct pointers to their child nodes to make it more convenient to manipulate the trees. This has a high potential for regressions. svn-id: r51099
591 lines
16 KiB
C++
591 lines
16 KiB
C++
/* ScummVM - Graphic Adventure Engine
|
|
*
|
|
* ScummVM is the legal property of its developers, whose names
|
|
* are too numerous to list here. Please refer to the COPYRIGHT
|
|
* file distributed with this source distribution.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* $URL$
|
|
* $Id$
|
|
*
|
|
*/
|
|
|
|
/* Functionality to transform the context-free SCI grammar rules into
|
|
* strict Greibach normal form (strict GNF), and to test SCI input against
|
|
* that grammar, writing an appropriate node tree if successful.
|
|
*/
|
|
|
|
#include "sci/parser/vocabulary.h"
|
|
#include "sci/console.h"
|
|
#include "common/array.h"
|
|
|
|
namespace Sci {
|
|
|
|
#define TOKEN_OPAREN 0xff000000
|
|
#define TOKEN_CPAREN 0xfe000000
|
|
#define TOKEN_TERMINAL_CLASS 0x10000
|
|
#define TOKEN_TERMINAL_GROUP 0x20000
|
|
#define TOKEN_STUFFING_WORD 0x40000
|
|
#define TOKEN_NON_NT (TOKEN_OPAREN | TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP | TOKEN_STUFFING_WORD)
|
|
#define TOKEN_TERMINAL (TOKEN_TERMINAL_CLASS | TOKEN_TERMINAL_GROUP)
|
|
|
|
static int _allocd_rules = 0; // FIXME: Avoid non-const global vars
|
|
|
|
struct ParseRule {
|
|
int _id; /**< non-terminal ID */
|
|
uint _firstSpecial; /**< first terminal or non-terminal */
|
|
uint _numSpecials; /**< number of terminals and non-terminals */
|
|
Common::Array<int> _data; /**< actual data */
|
|
|
|
~ParseRule() {
|
|
assert(_allocd_rules > 0);
|
|
--_allocd_rules;
|
|
}
|
|
|
|
// FIXME remove this one again?
|
|
bool operator==(const ParseRule &other) const {
|
|
return _id == other._id &&
|
|
_firstSpecial == other._firstSpecial &&
|
|
_numSpecials == other._numSpecials &&
|
|
_data == other._data;
|
|
}
|
|
};
|
|
|
|
|
|
struct ParseRuleList {
|
|
int terminal; /**< Terminal character this rule matches against or 0 for a non-terminal rule */
|
|
ParseRule *rule;
|
|
ParseRuleList *next;
|
|
|
|
void print() const;
|
|
|
|
ParseRuleList(ParseRule *r) : rule(r), next(0) {
|
|
int term = rule->_data[rule->_firstSpecial];
|
|
terminal = ((term & TOKEN_TERMINAL) ? term : 0);
|
|
}
|
|
|
|
~ParseRuleList() {
|
|
delete rule;
|
|
delete next;
|
|
}
|
|
};
|
|
|
|
|
|
static void vocab_print_rule(ParseRule *rule) {
|
|
int wspace = 0;
|
|
|
|
if (!rule) {
|
|
warning("NULL rule");
|
|
return;
|
|
}
|
|
|
|
printf("[%03x] -> ", rule->_id);
|
|
|
|
if (rule->_data.empty())
|
|
printf("e");
|
|
|
|
for (uint i = 0; i < rule->_data.size(); i++) {
|
|
uint token = rule->_data[i];
|
|
|
|
if (token == TOKEN_OPAREN) {
|
|
if (i == rule->_firstSpecial)
|
|
printf("_");
|
|
|
|
printf("(");
|
|
wspace = 0;
|
|
} else if (token == TOKEN_CPAREN) {
|
|
if (i == rule->_firstSpecial)
|
|
printf("_");
|
|
|
|
printf(")");
|
|
wspace = 0;
|
|
} else {
|
|
if (wspace)
|
|
printf(" ");
|
|
|
|
if (i == rule->_firstSpecial)
|
|
printf("_");
|
|
if (token & TOKEN_TERMINAL_CLASS)
|
|
printf("C(%04x)", token & 0xffff);
|
|
else if (token & TOKEN_TERMINAL_GROUP)
|
|
printf("G(%04x)", token & 0xffff);
|
|
else if (token & TOKEN_STUFFING_WORD)
|
|
printf("%03x", token & 0xffff);
|
|
else
|
|
printf("[%03x]", token); /* non-terminal */
|
|
wspace = 1;
|
|
}
|
|
|
|
if (i == rule->_firstSpecial)
|
|
printf("_");
|
|
}
|
|
printf(" [%d specials]", rule->_numSpecials);
|
|
}
|
|
|
|
static ParseRule *_vdup(ParseRule *a) {
|
|
++_allocd_rules;
|
|
return new ParseRule(*a);
|
|
}
|
|
|
|
static ParseRule *_vinsert(ParseRule *turkey, ParseRule *stuffing) {
|
|
uint firstnt = turkey->_firstSpecial;
|
|
|
|
// Search for first TOKEN_NON_NT in 'turkey'
|
|
while ((firstnt < turkey->_data.size()) && (turkey->_data[firstnt] & TOKEN_NON_NT))
|
|
firstnt++;
|
|
|
|
// If no TOKEN_NON_NT found, or if it doesn't match the id of 'stuffing', abort.
|
|
if ((firstnt == turkey->_data.size()) || (turkey->_data[firstnt] != stuffing->_id))
|
|
return NULL;
|
|
|
|
// Create a new rule as a copy of 'turkey', where the token firstnt has been substituted
|
|
// by the rule 'stuffing'.
|
|
++_allocd_rules;
|
|
|
|
ParseRule *rule = new ParseRule(*turkey);
|
|
rule->_numSpecials += stuffing->_numSpecials - 1;
|
|
rule->_firstSpecial = firstnt + stuffing->_firstSpecial;
|
|
rule->_data.resize(turkey->_data.size() - 1 + stuffing->_data.size());
|
|
|
|
// Replace rule->_data[firstnt] by all of stuffing->_data
|
|
Common::copy(stuffing->_data.begin(), stuffing->_data.end(), rule->_data.begin() + firstnt);
|
|
|
|
if (firstnt < turkey->_data.size() - 1)
|
|
Common::copy(turkey->_data.begin() + firstnt + 1, turkey->_data.end(),
|
|
rule->_data.begin() + firstnt + stuffing->_data.size());
|
|
|
|
return rule;
|
|
}
|
|
|
|
static ParseRule *_vbuild_rule(const parse_tree_branch_t *branch) {
|
|
int tokens = 0, tokenpos = 0, i;
|
|
|
|
while (tokenpos < 10 && branch->data[tokenpos]) {
|
|
int type = branch->data[tokenpos];
|
|
tokenpos += 2;
|
|
|
|
if ((type == VOCAB_TREE_NODE_COMPARE_TYPE) || (type == VOCAB_TREE_NODE_COMPARE_GROUP) || (type == VOCAB_TREE_NODE_FORCE_STORAGE))
|
|
++tokens;
|
|
else if (type > VOCAB_TREE_NODE_LAST_WORD_STORAGE)
|
|
tokens += 5;
|
|
else
|
|
return NULL; // invalid
|
|
}
|
|
|
|
ParseRule *rule = new ParseRule();
|
|
|
|
++_allocd_rules;
|
|
rule->_id = branch->id;
|
|
rule->_numSpecials = tokenpos >> 1;
|
|
rule->_data.resize(tokens);
|
|
rule->_firstSpecial = 0;
|
|
|
|
tokens = 0;
|
|
for (i = 0; i < tokenpos; i += 2) {
|
|
int type = branch->data[i];
|
|
int value = branch->data[i + 1];
|
|
|
|
if (type == VOCAB_TREE_NODE_COMPARE_TYPE)
|
|
rule->_data[tokens++] = value | TOKEN_TERMINAL_CLASS;
|
|
else if (type == VOCAB_TREE_NODE_COMPARE_GROUP)
|
|
rule->_data[tokens++] = value | TOKEN_TERMINAL_GROUP;
|
|
else if (type == VOCAB_TREE_NODE_FORCE_STORAGE)
|
|
rule->_data[tokens++] = value | TOKEN_STUFFING_WORD;
|
|
else { // normal inductive rule
|
|
rule->_data[tokens++] = TOKEN_OPAREN;
|
|
rule->_data[tokens++] = type | TOKEN_STUFFING_WORD;
|
|
rule->_data[tokens++] = value | TOKEN_STUFFING_WORD;
|
|
|
|
if (i == 0)
|
|
rule->_firstSpecial = tokens;
|
|
|
|
rule->_data[tokens++] = value; // The non-terminal
|
|
rule->_data[tokens++] = TOKEN_CPAREN;
|
|
}
|
|
}
|
|
|
|
return rule;
|
|
}
|
|
|
|
static ParseRule *_vsatisfy_rule(ParseRule *rule, const ResultWord &input) {
|
|
int dep;
|
|
|
|
if (!rule->_numSpecials)
|
|
return NULL;
|
|
|
|
dep = rule->_data[rule->_firstSpecial];
|
|
|
|
if (((dep & TOKEN_TERMINAL_CLASS) && ((dep & 0xffff) & input._class)) ||
|
|
((dep & TOKEN_TERMINAL_GROUP) && ((dep & 0xffff) & input._group))) {
|
|
ParseRule *retval = new ParseRule(*rule);
|
|
++_allocd_rules;
|
|
retval->_data[rule->_firstSpecial] = TOKEN_STUFFING_WORD | input._group;
|
|
retval->_numSpecials--;
|
|
retval->_firstSpecial = 0;
|
|
|
|
if (retval->_numSpecials) { // find first special, if it exists
|
|
for (uint i = rule->_firstSpecial; i < retval->_data.size(); ++i) {
|
|
int tmp = retval->_data[i];
|
|
if (!(tmp & TOKEN_NON_NT) || (tmp & TOKEN_TERMINAL)) {
|
|
retval->_firstSpecial = i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return retval;
|
|
} else
|
|
return NULL;
|
|
}
|
|
|
|
void Vocabulary::freeRuleList(ParseRuleList *list) {
|
|
delete list;
|
|
}
|
|
|
|
static ParseRuleList *_vocab_add_rule(ParseRuleList *list, ParseRule *rule) {
|
|
if (!rule)
|
|
return list;
|
|
if (!rule->_data.size()) {
|
|
// Special case for qfg2 demo
|
|
warning("no rule contents on _vocab_add_rule()");
|
|
return list;
|
|
}
|
|
|
|
ParseRuleList *new_elem = new ParseRuleList(rule);
|
|
|
|
if (list) {
|
|
const int term = new_elem->terminal;
|
|
/* if (term < list->terminal) {
|
|
new_elem->next = list;
|
|
return new_elem;
|
|
} else {*/
|
|
ParseRuleList *seeker = list;
|
|
|
|
while (seeker->next/* && seeker->next->terminal <= term*/) {
|
|
if (seeker->next->terminal == term) {
|
|
if (*(seeker->next->rule) == *rule) {
|
|
delete rule;
|
|
// FIXME: not sure about this change, fixes pq2 crashing when having opened the cabinet
|
|
// and typing "go to bains" - delete rule deletes part of new_elem
|
|
//delete new_elem;
|
|
return list; // No duplicate rules
|
|
}
|
|
}
|
|
seeker = seeker->next;
|
|
}
|
|
|
|
new_elem->next = seeker->next;
|
|
seeker->next = new_elem;
|
|
return list;
|
|
} else {
|
|
return new_elem;
|
|
}
|
|
}
|
|
|
|
void ParseRuleList::print() const {
|
|
const ParseRuleList *list = this;
|
|
int pos = 0;
|
|
while (list) {
|
|
printf("R%03d: ", pos);
|
|
vocab_print_rule(list->rule);
|
|
printf("\n");
|
|
list = list->next;
|
|
pos++;
|
|
}
|
|
printf("%d rules total.\n", pos);
|
|
}
|
|
|
|
static ParseRuleList *_vocab_split_rule_list(ParseRuleList *list) {
|
|
assert(list);
|
|
if (!list->next || (list->next->terminal)) {
|
|
ParseRuleList *tmp = list->next;
|
|
list->next = NULL;
|
|
return tmp;
|
|
} else
|
|
return _vocab_split_rule_list(list->next);
|
|
}
|
|
|
|
static void _vocab_free_empty_rule_list(ParseRuleList *list) {
|
|
assert(list);
|
|
if (list->next)
|
|
_vocab_free_empty_rule_list(list->next);
|
|
list->next = 0;
|
|
list->rule = 0;
|
|
delete list;
|
|
}
|
|
|
|
static ParseRuleList *_vocab_merge_rule_lists(ParseRuleList *l1, ParseRuleList *l2) {
|
|
ParseRuleList *retval = l1, *seeker = l2;
|
|
while (seeker) {
|
|
retval = _vocab_add_rule(retval, seeker->rule);
|
|
seeker = seeker->next;
|
|
}
|
|
_vocab_free_empty_rule_list(l2);
|
|
|
|
return retval;
|
|
}
|
|
|
|
static int _vocab_rule_list_length(ParseRuleList *list) {
|
|
return ((list) ? _vocab_rule_list_length(list->next) + 1 : 0);
|
|
}
|
|
|
|
static ParseRuleList *_vocab_clone_rule_list_by_id(ParseRuleList *list, int id) {
|
|
ParseRuleList *result = NULL;
|
|
ParseRuleList *seeker = list;
|
|
|
|
while (seeker) {
|
|
if (seeker->rule->_id == id) {
|
|
result = _vocab_add_rule(result, _vdup(seeker->rule));
|
|
}
|
|
seeker = seeker->next;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
ParseRuleList *Vocabulary::buildGNF(bool verbose) {
|
|
int iterations = 0;
|
|
int last_termrules, termrules = 0;
|
|
int ntrules_nr;
|
|
ParseRuleList *ntlist = NULL;
|
|
ParseRuleList *tlist, *new_tlist;
|
|
Console *con = g_sci->getSciDebugger();
|
|
|
|
for (uint i = 1; i < _parserBranches.size(); i++) { // branch rule 0 is treated specially
|
|
ParseRule *rule = _vbuild_rule(&_parserBranches[i]);
|
|
if (!rule)
|
|
return NULL;
|
|
ntlist = _vocab_add_rule(ntlist, rule);
|
|
}
|
|
|
|
tlist = _vocab_split_rule_list(ntlist);
|
|
ntrules_nr = _vocab_rule_list_length(ntlist);
|
|
|
|
if (verbose)
|
|
con->DebugPrintf("Starting with %d rules\n", ntrules_nr);
|
|
|
|
new_tlist = tlist;
|
|
tlist = NULL;
|
|
|
|
do {
|
|
ParseRuleList *new_new_tlist = NULL;
|
|
ParseRuleList *ntseeker, *tseeker;
|
|
last_termrules = termrules;
|
|
|
|
ntseeker = ntlist;
|
|
while (ntseeker) {
|
|
tseeker = new_tlist;
|
|
|
|
while (tseeker) {
|
|
ParseRule *newrule = _vinsert(ntseeker->rule, tseeker->rule);
|
|
if (newrule)
|
|
new_new_tlist = _vocab_add_rule(new_new_tlist, newrule);
|
|
tseeker = tseeker->next;
|
|
}
|
|
|
|
ntseeker = ntseeker->next;
|
|
}
|
|
|
|
tlist = _vocab_merge_rule_lists(tlist, new_tlist);
|
|
new_tlist = new_new_tlist;
|
|
termrules = _vocab_rule_list_length(new_new_tlist);
|
|
|
|
if (verbose)
|
|
con->DebugPrintf("After iteration #%d: %d new term rules\n", ++iterations, termrules);
|
|
|
|
} while (termrules && (iterations < 30));
|
|
|
|
freeRuleList(ntlist);
|
|
|
|
if (verbose) {
|
|
con->DebugPrintf("\nGNF rules:\n");
|
|
tlist->print();
|
|
con->DebugPrintf("%d allocd rules\n", _allocd_rules);
|
|
con->DebugPrintf("Freeing rule list...\n");
|
|
freeRuleList(tlist);
|
|
return NULL;
|
|
}
|
|
|
|
return tlist;
|
|
}
|
|
|
|
static int _vbpt_pareno(ParseTreeNode *nodes, int *pos, int base) {
|
|
// Opens parentheses
|
|
nodes[base].left = &nodes[(*pos) + 1];
|
|
nodes[++(*pos)].type = kParseTreeBranchNode;
|
|
nodes[*pos].left = 0;
|
|
nodes[*pos].right = 0;
|
|
return *pos;
|
|
}
|
|
|
|
static int _vbpt_parenc(ParseTreeNode *nodes, int *pos, int paren) {
|
|
// Closes parentheses for appending
|
|
nodes[paren].right = &nodes[++(*pos)];
|
|
nodes[*pos].type = kParseTreeBranchNode;
|
|
nodes[*pos].left = 0;
|
|
nodes[*pos].right = 0;
|
|
return *pos;
|
|
}
|
|
|
|
static int _vbpt_append(ParseTreeNode *nodes, int *pos, int base, int value) {
|
|
// writes one value to an existing base node and creates a successor node for writing
|
|
nodes[base].left = &nodes[++(*pos)];
|
|
nodes[*pos].type = kParseTreeLeafNode;
|
|
nodes[*pos].value = value;
|
|
nodes[base].right = &nodes[++(*pos)];
|
|
nodes[*pos].type = kParseTreeBranchNode;
|
|
nodes[*pos].left = 0;
|
|
nodes[*pos].right = 0;
|
|
return *pos;
|
|
}
|
|
|
|
static int _vbpt_terminate(ParseTreeNode *nodes, int *pos, int base, int value) {
|
|
// Terminates, overwriting a nextwrite forknode
|
|
nodes[base].type = kParseTreeLeafNode;
|
|
nodes[base].value = value;
|
|
return *pos;
|
|
}
|
|
|
|
static int _vbpt_write_subexpression(ParseTreeNode *nodes, int *pos, ParseRule *rule, uint rulepos, int writepos) {
|
|
uint token;
|
|
|
|
while ((token = ((rulepos < rule->_data.size()) ? rule->_data[rulepos++] : TOKEN_CPAREN)) != TOKEN_CPAREN) {
|
|
uint nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
|
|
if (token == TOKEN_OPAREN) {
|
|
int writepos2 = _vbpt_pareno(nodes, pos, writepos);
|
|
rulepos = _vbpt_write_subexpression(nodes, pos, rule, rulepos, writepos2);
|
|
nexttoken = (rulepos < rule->_data.size()) ? rule->_data[rulepos] : TOKEN_CPAREN;
|
|
if (nexttoken != TOKEN_CPAREN)
|
|
writepos = _vbpt_parenc(nodes, pos, writepos);
|
|
} else if (token & TOKEN_STUFFING_WORD) {
|
|
if (nexttoken == TOKEN_CPAREN)
|
|
writepos = _vbpt_terminate(nodes, pos, writepos, token & 0xffff);
|
|
else
|
|
writepos = _vbpt_append(nodes, pos, writepos, token & 0xffff);
|
|
} else {
|
|
printf("\nError in parser (grammar.cpp, _vbpt_write_subexpression()): Rule data broken in rule ");
|
|
vocab_print_rule(rule);
|
|
printf(", at token position %d\n", *pos);
|
|
return rulepos;
|
|
}
|
|
}
|
|
|
|
return rulepos;
|
|
}
|
|
|
|
int Vocabulary::parseGNF(const ResultWordList &words, bool verbose) {
|
|
Console *con = g_sci->getSciDebugger();
|
|
// Get the start rules:
|
|
ParseRuleList *work = _vocab_clone_rule_list_by_id(_parserRules, _parserBranches[0].data[1]);
|
|
ParseRuleList *results = NULL;
|
|
uint word = 0;
|
|
const uint words_nr = words.size();
|
|
ResultWordList::const_iterator word_iter = words.begin();
|
|
|
|
for (word_iter = words.begin(); word_iter != words.end(); ++word_iter, ++word) {
|
|
ParseRuleList *new_work = NULL;
|
|
ParseRuleList *reduced_rules = NULL;
|
|
ParseRuleList *seeker, *subseeker;
|
|
|
|
if (verbose)
|
|
con->DebugPrintf("Adding word %d...\n", word);
|
|
|
|
seeker = work;
|
|
while (seeker) {
|
|
if (seeker->rule->_numSpecials <= (words_nr - word))
|
|
reduced_rules = _vocab_add_rule(reduced_rules, _vsatisfy_rule(seeker->rule, *word_iter));
|
|
|
|
seeker = seeker->next;
|
|
}
|
|
|
|
if (reduced_rules == NULL) {
|
|
freeRuleList(work);
|
|
if (verbose)
|
|
con->DebugPrintf("No results.\n");
|
|
return 1;
|
|
}
|
|
|
|
freeRuleList(work);
|
|
|
|
if (word + 1 < words_nr) {
|
|
seeker = reduced_rules;
|
|
|
|
while (seeker) {
|
|
if (seeker->rule->_numSpecials) {
|
|
int my_id = seeker->rule->_data[seeker->rule->_firstSpecial];
|
|
|
|
subseeker = _parserRules;
|
|
while (subseeker) {
|
|
if (subseeker->rule->_id == my_id)
|
|
new_work = _vocab_add_rule(new_work, _vinsert(seeker->rule, subseeker->rule));
|
|
|
|
subseeker = subseeker->next;
|
|
}
|
|
}
|
|
|
|
seeker = seeker->next;
|
|
}
|
|
freeRuleList(reduced_rules);
|
|
} else // last word
|
|
new_work = reduced_rules;
|
|
|
|
work = new_work;
|
|
if (verbose)
|
|
con->DebugPrintf("Now at %d candidates\n", _vocab_rule_list_length(work));
|
|
if (work == NULL) {
|
|
if (verbose)
|
|
con->DebugPrintf("No results.\n");
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
results = work;
|
|
|
|
if (verbose) {
|
|
con->DebugPrintf("All results (excluding the surrounding '(141 %03x' and ')'):\n", _parserBranches[0].id);
|
|
results->print();
|
|
con->DebugPrintf("\n");
|
|
}
|
|
|
|
// now use the first result
|
|
{
|
|
int temp, pos;
|
|
|
|
_parserNodes[0].type = kParseTreeBranchNode;
|
|
_parserNodes[0].left = &_parserNodes[1];
|
|
_parserNodes[0].right = &_parserNodes[2];
|
|
|
|
_parserNodes[1].type = kParseTreeLeafNode;
|
|
_parserNodes[1].value = 0x141;
|
|
|
|
_parserNodes[2].type = kParseTreeBranchNode;
|
|
_parserNodes[2].left = 0;
|
|
_parserNodes[2].right = 0;
|
|
|
|
pos = 2;
|
|
|
|
temp = _vbpt_append(_parserNodes, &pos, 2, _parserBranches[0].id);
|
|
//_vbpt_write_subexpression(nodes, &pos, results[_vocab_rule_list_length(results)].rule, 0, temp);
|
|
_vbpt_write_subexpression(_parserNodes, &pos, results->rule, 0, temp);
|
|
}
|
|
|
|
freeRuleList(results);
|
|
|
|
return 0;
|
|
}
|
|
|
|
} // End of namespace Sci
|