mirror of
https://github.com/libretro/scummvm.git
synced 2025-01-08 19:00:57 +00:00
eb0710c2cd
svn-id: r40101
567 lines
15 KiB
C++
567 lines
15 KiB
C++
/* ScummVM - Graphic Adventure Engine
|
|
*
|
|
* ScummVM is the legal property of its developers, whose names
|
|
* are too numerous to list here. Please refer to the COPYRIGHT
|
|
* file distributed with this source distribution.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*
|
|
* $URL$
|
|
* $Id$
|
|
*
|
|
*/
|
|
|
|
// Main vocabulary support functions and word lookup
|
|
|
|
#include "sci/scicore/vocabulary.h"
|
|
#include "sci/scicore/resource.h"
|
|
#include "sci/engine/state.h"
|
|
#include "sci/engine/kernel.h"
|
|
|
|
namespace Sci {
|
|
|
|
int vocab_version;
|
|
|
|
#define VOCAB_RESOURCE_PARSE_TREE_BRANCHES vocab_version == 1 ? \
|
|
VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES : \
|
|
VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES
|
|
|
|
#define VOCAB_RESOURCE_SUFFIX_VOCAB vocab_version==1 ? \
|
|
VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB : \
|
|
VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB
|
|
|
|
|
|
/*
|
|
// These strange names were taken from an SCI01 interpreter
|
|
const char *class_names[] = {"",
|
|
"",
|
|
"conj", // conjunction
|
|
"ass", // ?
|
|
"pos", // preposition ?
|
|
"art", // article
|
|
"adj", // adjective
|
|
"pron", // pronoun
|
|
"noun", // noun
|
|
"auxv", // auxillary verb
|
|
"adv", // adverb
|
|
"verb", // verb
|
|
"",
|
|
"",
|
|
"",
|
|
""
|
|
};
|
|
*/
|
|
|
|
bool vocab_get_words(ResourceManager *resmgr, WordMap &words) {
|
|
|
|
char currentword[256] = ""; // They're not going to use words longer than 255 ;-)
|
|
int currentwordpos = 0;
|
|
|
|
Resource *resource;
|
|
|
|
// First try to load the SCI0 vocab resource.
|
|
resource = resmgr->findResource(kResourceTypeVocab, VOCAB_RESOURCE_SCI0_MAIN_VOCAB, 0);
|
|
vocab_version = 0;
|
|
|
|
if (!resource) {
|
|
warning("SCI0: Could not find a main vocabulary, trying SCI01");
|
|
resource = resmgr->findResource(kResourceTypeVocab, VOCAB_RESOURCE_SCI1_MAIN_VOCAB, 0);
|
|
vocab_version = 1;
|
|
}
|
|
|
|
if (!resource) {
|
|
warning("SCI1: Could not find a main vocabulary");
|
|
return false; // NOT critical: SCI1 games and some demos don't have one!
|
|
}
|
|
|
|
unsigned int seeker;
|
|
if (vocab_version == 1)
|
|
seeker = 255 * 2; // vocab.900 starts with 255 16-bit pointers which we don't use
|
|
else
|
|
seeker = 26 * 2; // vocab.000 starts with 26 16-bit pointers which we don't use
|
|
|
|
if (resource->size < seeker) {
|
|
fprintf(stderr, "Invalid main vocabulary encountered: Too small\n");
|
|
return false;
|
|
// Now this ought to be critical, but it'll just cause parse() and said() not to work
|
|
}
|
|
|
|
words.clear();
|
|
|
|
while (seeker < resource->size) {
|
|
byte c;
|
|
|
|
currentwordpos = resource->data[seeker++]; // Parts of previous words may be re-used
|
|
|
|
if (vocab_version == 1) {
|
|
c = 1;
|
|
while (seeker < resource->size && currentwordpos < 255 && c) {
|
|
c = resource->data[seeker++];
|
|
currentword[currentwordpos++] = c;
|
|
}
|
|
if (seeker == resource->size) {
|
|
warning("SCI1: Vocabulary not usable, disabling");
|
|
words.clear();
|
|
return false;
|
|
}
|
|
} else {
|
|
do {
|
|
c = resource->data[seeker++];
|
|
currentword[currentwordpos++] = c & 0x7f; // 0x80 is used to terminate the string
|
|
} while (c < 0x80);
|
|
}
|
|
|
|
currentword[currentwordpos] = 0;
|
|
|
|
// Now decode class and group:
|
|
c = resource->data[seeker + 1];
|
|
ResultWord newWord;
|
|
newWord._class = ((resource->data[seeker]) << 4) | ((c & 0xf0) >> 4);
|
|
newWord._group = (resource->data[seeker + 2]) | ((c & 0x0f) << 8);
|
|
|
|
// Add the word to the list
|
|
words[currentword] = newWord;
|
|
|
|
seeker += 3;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
const char *vocab_get_any_group_word(int group, const WordMap &words) {
|
|
if (group == VOCAB_MAGIC_NUMBER_GROUP)
|
|
return "{number}";
|
|
|
|
for (WordMap::const_iterator i = words.begin(); i != words.end(); ++i)
|
|
if (i->_value._group == group)
|
|
return i->_key.c_str();
|
|
|
|
return "{invalid}";
|
|
}
|
|
|
|
bool vocab_get_suffixes(ResourceManager *resmgr, SuffixList &suffixes) {
|
|
Resource *resource = resmgr->findResource(kResourceTypeVocab, VOCAB_RESOURCE_SUFFIX_VOCAB, 1);
|
|
unsigned int seeker = 1;
|
|
|
|
if (!resource) {
|
|
warning("Could not find suffix vocabulary");
|
|
return false; // Not critical
|
|
}
|
|
|
|
while ((seeker < resource->size - 1) && (resource->data[seeker + 1] != 0xff)) {
|
|
suffix_t suffix;
|
|
|
|
suffix.alt_suffix = (const char *)resource->data + seeker;
|
|
suffix.alt_suffix_length = strlen(suffix.alt_suffix);
|
|
seeker += suffix.alt_suffix_length + 1; // Hit end of string
|
|
|
|
suffix.class_mask = (int16)READ_BE_UINT16(resource->data + seeker);
|
|
seeker += 2;
|
|
|
|
// Beginning of next string - skip leading '*'
|
|
seeker++;
|
|
|
|
suffix.word_suffix = (const char *)resource->data + seeker;
|
|
suffix.word_suffix_length = strlen(suffix.word_suffix);
|
|
seeker += suffix.word_suffix_length + 1;
|
|
|
|
suffix.result_class = (int16)READ_BE_UINT16(resource->data + seeker);
|
|
seeker += 3; // Next entry
|
|
|
|
suffixes.push_back(suffix);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void vocab_free_suffixes(ResourceManager *resmgr, SuffixList &suffixes) {
|
|
resmgr->unlockResource(resmgr->findResource(kResourceTypeVocab, VOCAB_RESOURCE_SUFFIX_VOCAB, 0),
|
|
VOCAB_RESOURCE_SUFFIX_VOCAB, kResourceTypeVocab);
|
|
|
|
suffixes.clear();
|
|
}
|
|
|
|
bool vocab_get_branches(ResourceManager * resmgr, Common::Array<parse_tree_branch_t> &branches) {
|
|
Resource *resource = resmgr->findResource(kResourceTypeVocab, VOCAB_RESOURCE_PARSE_TREE_BRANCHES, 0);
|
|
|
|
branches.clear();
|
|
|
|
if (!resource) {
|
|
fprintf(stderr, "No parser tree data found!\n");
|
|
return false;
|
|
}
|
|
|
|
int branches_nr = resource->size / 20;
|
|
|
|
if (branches_nr == 0) {
|
|
fprintf(stderr, "Parser tree data is empty!\n");
|
|
return false;
|
|
}
|
|
|
|
branches.resize(branches_nr);
|
|
|
|
for (int i = 0; i < branches_nr; i++) {
|
|
byte *base = resource->data + i * 20;
|
|
|
|
branches[i].id = (int16)READ_LE_UINT16(base);
|
|
|
|
for (int k = 0; k < 9; k++)
|
|
branches[i].data[k] = READ_LE_UINT16(base + 2 + 2 * k);
|
|
|
|
branches[i].data[9] = 0; // Always terminate
|
|
}
|
|
|
|
if (!branches[branches_nr - 1].id) // branch lists may be terminated by empty rules
|
|
branches.remove_at(branches_nr - 1);
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
ResultWord vocab_lookup_word(const char *word, int word_len, const WordMap &words, const SuffixList &suffixes) {
|
|
Common::String tempword(word, word_len);
|
|
|
|
// Remove all dashes from tempword
|
|
for (uint i = 0; i < tempword.size(); ) {
|
|
if (tempword[i] == '-')
|
|
tempword.deleteChar(i);
|
|
else
|
|
++i;
|
|
}
|
|
|
|
// Look it up:
|
|
WordMap::iterator dict_word = words.find(tempword);
|
|
|
|
// Match found? Return it!
|
|
if (dict_word != words.end()) {
|
|
return dict_word->_value;
|
|
}
|
|
|
|
// Now try all suffixes
|
|
for (SuffixList::const_iterator suffix = suffixes.begin(); suffix != suffixes.end(); ++suffix)
|
|
if (suffix->alt_suffix_length <= word_len) {
|
|
|
|
int suff_index = word_len - suffix->alt_suffix_length;
|
|
// Offset of the start of the suffix
|
|
|
|
if (scumm_strnicmp(suffix->alt_suffix, word + suff_index, suffix->alt_suffix_length) == 0) { // Suffix matched!
|
|
// Terminate word at suffix start position...:
|
|
Common::String tempword2(word, MIN(word_len, suff_index));
|
|
|
|
// ...and append "correct" suffix
|
|
tempword2 += Common::String(suffix->word_suffix, suffix->word_suffix_length);
|
|
|
|
dict_word = words.find(tempword2);
|
|
|
|
if ((dict_word != words.end()) && (dict_word->_value._class & suffix->class_mask)) { // Found it?
|
|
// Use suffix class
|
|
ResultWord tmp = dict_word->_value;
|
|
tmp._class = suffix->result_class;
|
|
return tmp;
|
|
}
|
|
}
|
|
}
|
|
|
|
// No match so far? Check if it's a number.
|
|
|
|
ResultWord retval = { -1, -1 };
|
|
char *tester;
|
|
if ((strtol(tempword.c_str(), &tester, 10) >= 0) && (*tester == '\0')) { // Do we have a complete number here?
|
|
ResultWord tmp = { VOCAB_CLASS_NUMBER, VOCAB_MAGIC_NUMBER_GROUP };
|
|
retval = tmp;
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
void vocab_decypher_said_block(EngineState *s, byte *addr) {
|
|
int nextitem;
|
|
|
|
do {
|
|
nextitem = *addr++;
|
|
|
|
if (nextitem < 0xf0) {
|
|
nextitem = nextitem << 8 | *addr++;
|
|
sciprintf(" %s[%03x]", vocab_get_any_group_word(nextitem, s->_parserWords), nextitem);
|
|
|
|
nextitem = 42; // Make sure that group 0xff doesn't abort
|
|
} else switch (nextitem) {
|
|
case 0xf0:
|
|
sciprintf(" ,");
|
|
break;
|
|
case 0xf1:
|
|
sciprintf(" &");
|
|
break;
|
|
case 0xf2:
|
|
sciprintf(" /");
|
|
break;
|
|
case 0xf3:
|
|
sciprintf(" (");
|
|
break;
|
|
case 0xf4:
|
|
sciprintf(" )");
|
|
break;
|
|
case 0xf5:
|
|
sciprintf(" [");
|
|
break;
|
|
case 0xf6:
|
|
sciprintf(" ]");
|
|
break;
|
|
case 0xf7:
|
|
sciprintf(" #");
|
|
break;
|
|
case 0xf8:
|
|
sciprintf(" <");
|
|
break;
|
|
case 0xf9:
|
|
sciprintf(" >");
|
|
break;
|
|
case 0xff:
|
|
break;
|
|
}
|
|
} while (nextitem != 0xff);
|
|
|
|
sciprintf("\n");
|
|
}
|
|
|
|
|
|
#ifdef SCI_SIMPLE_SAID_CODE
|
|
|
|
static const short _related_words[][2] = { // 0 is backwards, 1 is forward
|
|
{0x800, 0x180}, // preposition
|
|
{0x000, 0x180}, // article
|
|
{0x000, 0x180}, // adjective
|
|
{0x800, 0x000}, // pronoun
|
|
{0x800, 0x180}, // noun
|
|
{0x000, 0x800}, // auxiliary verb
|
|
{0x800, 0x800}, // adverb
|
|
{0x000, 0x180}, // verb
|
|
{0x000, 0x180} // number
|
|
};
|
|
|
|
int vocab_build_simple_parse_tree(parse_tree_node_t *nodes, WordMap &words) {
|
|
int i, length, pos = 0;
|
|
|
|
for (i = 0; i < words.size(); ++i) {
|
|
if (words[i]._class != VOCAB_CLASS_ANYWORD) {
|
|
nodes[pos].type = words[i]._class;
|
|
nodes[pos].content.value = words[i]._group;
|
|
pos += 2; // Link information is filled in below
|
|
}
|
|
}
|
|
nodes[pos].type = -1; // terminate
|
|
length = pos >> 1;
|
|
|
|
// now find all referenced words
|
|
#ifdef SCI_SIMPLE_SAID_DEBUG
|
|
sciprintf("Semantic references:\n");
|
|
#endif
|
|
|
|
for (i = 0; i < length; i++) {
|
|
int j;
|
|
int searchmask;
|
|
int type;
|
|
|
|
pos = (i << 1);
|
|
type = sci_ffs(nodes[pos].type);
|
|
|
|
if (type) {
|
|
int found = -1;
|
|
|
|
type -= 5; // 1 because ffs starts counting at 1, 4 because nodes[pos].type is a nibble off
|
|
if (type < 0)
|
|
type = 0;
|
|
#ifdef SCI_SIMPLE_SAID_DEBUG
|
|
sciprintf("#%d: Word %04x: type %04x\n", i, nodes[pos].content.value, type);
|
|
#endif
|
|
|
|
// search backwards
|
|
searchmask = _related_words[type][0];
|
|
if (searchmask) {
|
|
for (j = i - 1; j >= 0; j--)
|
|
if (nodes[j << 1].type & searchmask) {
|
|
found = j << 1;
|
|
break;
|
|
}
|
|
}
|
|
nodes[pos+1].content.branches[0] = found;
|
|
#ifdef SCI_SIMPLE_SAID_DEBUG
|
|
if (found > -1)
|
|
sciprintf(" %d <\n", found >> 1);
|
|
#endif
|
|
|
|
// search forward
|
|
found = -1;
|
|
searchmask = _related_words[type][1];
|
|
if (searchmask) {
|
|
for (j = i + 1; j < length; j++)
|
|
if (nodes[j << 1].type & searchmask) {
|
|
found = j << 1;
|
|
break;
|
|
}
|
|
}
|
|
#ifdef SCI_SIMPLE_SAID_DEBUG
|
|
if (found > -1)
|
|
sciprintf(" > %d\n", found >> 1);
|
|
#endif
|
|
} else {
|
|
#ifdef SCI_SIMPLE_SAID_DEBUG
|
|
sciprintf("#%d: Untypified word\n", i); /* Weird, but not fatal */
|
|
#endif
|
|
nodes[pos + 1].content.branches[0] = -1;
|
|
nodes[pos + 1].content.branches[1] = -1;
|
|
}
|
|
}
|
|
#ifdef SCI_SIMPLE_SAID_DEBUG
|
|
sciprintf("/Semantic references.\n");
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
bool vocab_tokenize_string(ResultWordList &retval, const char *sentence, const WordMap &words,
|
|
const SuffixList &suffixes, char **error) {
|
|
const char *lastword = sentence;
|
|
int pos_in_sentence = 0;
|
|
char c;
|
|
int wordlen = 0;
|
|
|
|
*error = NULL;
|
|
|
|
do {
|
|
|
|
c = sentence[pos_in_sentence++];
|
|
|
|
if (isalnum(c) || (c == '-' && wordlen))
|
|
++wordlen;
|
|
// Continue on this word */
|
|
// Words may contain a '-', but may not
|
|
// start with one.
|
|
else {
|
|
if (wordlen) { // Finished a word?
|
|
|
|
ResultWord lookup_result =
|
|
vocab_lookup_word(lastword, wordlen, words, suffixes);
|
|
// Look it up
|
|
|
|
if (lookup_result._class == -1) { // Not found?
|
|
*error = (char *)sci_calloc(wordlen + 1, 1);
|
|
strncpy(*error, lastword, wordlen); // Set the offending word
|
|
retval.clear();
|
|
return false; // And return with error
|
|
}
|
|
|
|
// Copy into list
|
|
retval.push_back(lookup_result);
|
|
}
|
|
|
|
lastword = sentence + pos_in_sentence;
|
|
wordlen = 0;
|
|
}
|
|
|
|
} while (c); // Until terminator is hit
|
|
|
|
return true;
|
|
}
|
|
|
|
void _vocab_recursive_ptree_dump_treelike(parse_tree_node_t *nodes, int nr, int prevnr) {
|
|
if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) {
|
|
sciprintf("Error(%04x)", nr);
|
|
return;
|
|
}
|
|
|
|
if (nodes[nr].type == PARSE_TREE_NODE_LEAF)
|
|
//sciprintf("[%03x]%04x", nr, nodes[nr].content.value);
|
|
sciprintf("%x", nodes[nr].content.value);
|
|
else {
|
|
int lbranch = nodes[nr].content.branches[0];
|
|
int rbranch = nodes[nr].content.branches[1];
|
|
//sciprintf("<[%03x]", nr);
|
|
sciprintf("<");
|
|
|
|
if (lbranch)
|
|
_vocab_recursive_ptree_dump_treelike(nodes, lbranch, nr);
|
|
else
|
|
sciprintf("NULL");
|
|
|
|
sciprintf(",");
|
|
|
|
if (rbranch)
|
|
_vocab_recursive_ptree_dump_treelike(nodes, rbranch, nr);
|
|
else
|
|
sciprintf("NULL");
|
|
|
|
sciprintf(">");
|
|
}
|
|
}
|
|
|
|
void _vocab_recursive_ptree_dump(parse_tree_node_t *nodes, int nr, int prevnr, int blanks) {
|
|
int lbranch = nodes[nr].content.branches[0];
|
|
int rbranch = nodes[nr].content.branches[1];
|
|
int i;
|
|
|
|
if (nodes[nr].type == PARSE_TREE_NODE_LEAF) {
|
|
sciprintf("vocab_dump_parse_tree: Error: consp is nil for element %03x\n", nr);
|
|
return;
|
|
}
|
|
|
|
if ((nr > VOCAB_TREE_NODES)/* || (nr < prevnr)*/) {
|
|
sciprintf("Error(%04x))", nr);
|
|
return;
|
|
}
|
|
|
|
if (lbranch) {
|
|
if (nodes[lbranch].type == PARSE_TREE_NODE_BRANCH) {
|
|
sciprintf("\n");
|
|
for (i = 0; i < blanks; i++)
|
|
sciprintf(" ");
|
|
sciprintf("(");
|
|
_vocab_recursive_ptree_dump(nodes, lbranch, nr, blanks + 1);
|
|
sciprintf(")\n");
|
|
for (i = 0; i < blanks; i++)
|
|
sciprintf(" ");
|
|
} else
|
|
sciprintf("%x", nodes[lbranch].content.value);
|
|
sciprintf(" ");
|
|
}/* else sciprintf ("nil");*/
|
|
|
|
if (rbranch) {
|
|
if (nodes[rbranch].type == PARSE_TREE_NODE_BRANCH)
|
|
_vocab_recursive_ptree_dump(nodes, rbranch, nr, blanks);
|
|
else
|
|
sciprintf("%x", nodes[rbranch].content.value);
|
|
}/* else sciprintf("nil");*/
|
|
}
|
|
|
|
void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes) {
|
|
//_vocab_recursive_ptree_dump_treelike(nodes, 0, 0);
|
|
sciprintf("(setq %s \n'(", tree_name);
|
|
_vocab_recursive_ptree_dump(nodes, 0, 0, 1);
|
|
sciprintf("))\n");
|
|
}
|
|
|
|
void vocab_synonymize_tokens(ResultWordList &words, const SynonymList &synonyms) {
|
|
if (synonyms.empty())
|
|
return; // No synonyms: Nothing to check
|
|
|
|
for (ResultWordList::iterator i = words.begin(); i != words.end(); ++i)
|
|
for (SynonymList::const_iterator sync = synonyms.begin(); sync != synonyms.end(); ++sync)
|
|
if (i->_group == sync->replaceant)
|
|
i->_group = sync->replacement;
|
|
}
|
|
|
|
} // End of namespace Sci
|