2009-02-17 15:05:16 +00:00
|
|
|
/* ScummVM - Graphic Adventure Engine
|
|
|
|
*
|
|
|
|
* ScummVM is the legal property of its developers, whose names
|
|
|
|
* are too numerous to list here. Please refer to the COPYRIGHT
|
|
|
|
* file distributed with this source distribution.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License
|
|
|
|
* as published by the Free Software Foundation; either version 2
|
|
|
|
* of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
*
|
|
|
|
* $URL$
|
|
|
|
* $Id$
|
|
|
|
*
|
|
|
|
*/
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-27 02:23:00 +00:00
|
|
|
#ifndef SCI_SCICORE_VOCABULARY_H
|
|
|
|
#define SCI_SCICORE_VOCABULARY_H
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-25 02:05:43 +00:00
|
|
|
#include "common/str.h"
|
2009-03-24 17:42:12 +00:00
|
|
|
#include "common/hashmap.h"
|
|
|
|
#include "common/hash-str.h"
|
2009-03-08 08:17:43 +00:00
|
|
|
#include "common/list.h"
|
2009-02-25 02:05:43 +00:00
|
|
|
|
2009-05-14 12:38:50 +00:00
|
|
|
#include "sci/sci.h"
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-21 10:23:36 +00:00
|
|
|
namespace Sci {
|
|
|
|
|
2009-02-28 20:45:36 +00:00
|
|
|
class ResourceManager;
|
2009-02-25 02:05:43 +00:00
|
|
|
|
2009-02-15 06:10:59 +00:00
|
|
|
/*#define VOCABULARY_DEBUG */
|
|
|
|
|
2009-05-26 14:44:14 +00:00
|
|
|
/** Number of bytes allocated on the heap to store bad words if parsing fails */
|
2009-02-15 06:10:59 +00:00
|
|
|
#define PARSE_HEAP_SIZE 64
|
|
|
|
|
2009-05-26 14:44:14 +00:00
|
|
|
enum {
|
2009-08-27 23:39:59 +00:00
|
|
|
VOCAB_RESOURCE_SELECTORS = 997,
|
2009-05-26 14:44:14 +00:00
|
|
|
|
|
|
|
VOCAB_RESOURCE_SCI0_MAIN_VOCAB = 0,
|
|
|
|
VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES = 900,
|
|
|
|
VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB = 901,
|
|
|
|
|
|
|
|
VOCAB_RESOURCE_SCI1_MAIN_VOCAB = 900,
|
|
|
|
VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES = 901,
|
2009-08-27 23:39:59 +00:00
|
|
|
VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB = 902
|
2009-05-26 14:44:14 +00:00
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
|
|
|
|
2009-03-09 22:25:33 +00:00
|
|
|
enum {
|
|
|
|
VOCAB_CLASS_PREPOSITION = 0x01,
|
|
|
|
VOCAB_CLASS_ARTICLE = 0x02,
|
|
|
|
VOCAB_CLASS_ADJECTIVE = 0x04,
|
|
|
|
VOCAB_CLASS_PRONOUN = 0x08,
|
|
|
|
VOCAB_CLASS_NOUN = 0x10,
|
|
|
|
VOCAB_CLASS_INDICATIVE_VERB = 0x20,
|
|
|
|
VOCAB_CLASS_ADVERB = 0x40,
|
|
|
|
VOCAB_CLASS_IMPERATIVE_VERB = 0x80,
|
|
|
|
VOCAB_CLASS_NUMBER = 0x001
|
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-09-30 12:17:38 +00:00
|
|
|
enum {
|
|
|
|
kParseEndOfInput = 0,
|
|
|
|
kParseOpeningParenthesis = 1,
|
|
|
|
kParseClosingParenthesis = 2,
|
|
|
|
kParseNil = 3,
|
|
|
|
kParseNumber = 4
|
|
|
|
};
|
|
|
|
|
2009-02-15 06:10:59 +00:00
|
|
|
/* Anywords are ignored by the parser */
|
2009-10-19 22:13:51 +00:00
|
|
|
#define VOCAB_CLASS_ANYWORD 0xff
|
2009-02-15 06:10:59 +00:00
|
|
|
|
|
|
|
/* This word class is used for numbers */
|
2009-10-19 22:13:51 +00:00
|
|
|
#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
|
2009-02-15 06:10:59 +00:00
|
|
|
|
|
|
|
/* Number of nodes for each parse_tree_node structure */
|
2009-10-19 22:13:51 +00:00
|
|
|
#define VOCAB_TREE_NODES 500
|
2009-02-15 06:10:59 +00:00
|
|
|
|
|
|
|
#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
|
|
|
|
#define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
|
|
|
|
#define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
|
|
|
|
#define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
|
|
|
|
|
|
|
|
#define SAID_COMMA 0xf0
|
|
|
|
#define SAID_AMP 0xf1
|
|
|
|
#define SAID_SLASH 0xf2
|
|
|
|
#define SAID_PARENO 0xf3
|
|
|
|
#define SAID_PARENC 0xf4
|
|
|
|
#define SAID_BRACKO 0xf5
|
|
|
|
#define SAID_BRACKC 0xf6
|
|
|
|
#define SAID_HASH 0xf7
|
|
|
|
#define SAID_LT 0xf8
|
|
|
|
#define SAID_GT 0xf9
|
|
|
|
#define SAID_TERM 0xff
|
|
|
|
|
|
|
|
#define SAID_FIRST SAID_COMMA
|
|
|
|
|
|
|
|
/* There was no 'last matching word': */
|
|
|
|
#define SAID_FULL_MATCH 0xffff
|
|
|
|
#define SAID_NO_MATCH 0xfffe
|
|
|
|
#define SAID_PARTIAL_MATCH 0xfffd
|
|
|
|
|
|
|
|
#define SAID_LONG(x) ((x) << 8)
|
|
|
|
|
2009-03-09 22:25:33 +00:00
|
|
|
struct ResultWord {
|
2009-10-19 22:13:51 +00:00
|
|
|
int _class; /**< Word class */
|
|
|
|
int _group; /**< Word group */
|
2009-03-09 22:25:33 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef Common::List<ResultWord> ResultWordList;
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-03-24 17:42:12 +00:00
|
|
|
typedef Common::HashMap<Common::String, ResultWord, Common::IgnoreCase_Hash, Common::IgnoreCase_EqualTo> WordMap;
|
2009-02-15 06:10:59 +00:00
|
|
|
|
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
struct ParseRuleList;
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-21 22:06:42 +00:00
|
|
|
struct suffix_t {
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
int class_mask; /**< the word class this suffix applies to */
|
|
|
|
int result_class; /**< the word class a word is morphed to if it doesn't fail this check */
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
int alt_suffix_length; /**< String length of the suffix */
|
|
|
|
int word_suffix_length; /**< String length of the other suffix */
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
const char *alt_suffix; /**< The alternative suffix */
|
|
|
|
const char *word_suffix; /**< The suffix as used in the word vocabulary */
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-21 22:06:42 +00:00
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-03-08 08:17:43 +00:00
|
|
|
typedef Common::List<suffix_t> SuffixList;
|
|
|
|
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-21 22:06:42 +00:00
|
|
|
struct synonym_t {
|
2009-10-19 22:13:51 +00:00
|
|
|
int replaceant; /**< The word group to replace */
|
|
|
|
int replacement; /**< The replacement word group for this one */
|
2009-02-21 22:06:42 +00:00
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-03-24 17:41:26 +00:00
|
|
|
typedef Common::List<synonym_t> SynonymList;
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-21 22:06:42 +00:00
|
|
|
struct parse_tree_branch_t {
|
2009-02-15 22:32:57 +00:00
|
|
|
int id;
|
|
|
|
int data[10];
|
2009-02-21 22:06:42 +00:00
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-06-06 16:43:13 +00:00
|
|
|
enum ParseTypes {
|
|
|
|
kParseTreeLeafNode = 0,
|
|
|
|
kParseTreeBranchNode = 1
|
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-21 22:06:42 +00:00
|
|
|
struct parse_tree_node_t {
|
2009-10-19 22:13:51 +00:00
|
|
|
ParseTypes type; /**< leaf or branch */
|
2009-02-15 22:32:57 +00:00
|
|
|
union {
|
2009-10-19 22:13:51 +00:00
|
|
|
int value; /**< For leaves */
|
|
|
|
short branches[2]; /**< For branches */
|
2009-02-15 22:32:57 +00:00
|
|
|
} content;
|
2009-02-21 22:06:42 +00:00
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-05-31 12:05:49 +00:00
|
|
|
enum VocabularyVersions {
|
|
|
|
kVocabularySCI0 = 0,
|
|
|
|
kVocabularySCI1 = 1
|
|
|
|
};
|
|
|
|
|
2009-05-30 22:15:00 +00:00
|
|
|
class Vocabulary {
|
|
|
|
public:
|
2009-09-02 12:02:37 +00:00
|
|
|
Vocabulary(ResourceManager *resMan);
|
2009-05-30 22:15:00 +00:00
|
|
|
~Vocabulary();
|
|
|
|
|
2009-05-31 12:05:49 +00:00
|
|
|
/**
|
|
|
|
* Gets any word from the specified group. For debugging only.
|
|
|
|
* @param group Group number
|
|
|
|
*/
|
|
|
|
const char *getAnyWordFromGroup(int group);
|
|
|
|
|
|
|
|
|
2009-05-31 15:08:47 +00:00
|
|
|
/**
|
|
|
|
* Looks up a single word in the words and suffixes list.
|
|
|
|
* @param word pointer to the word to look up
|
|
|
|
* @param word_len length of the word to look up
|
|
|
|
* @return the matching word (or (-1,-1) if there was no match)
|
|
|
|
*/
|
2009-05-31 12:05:49 +00:00
|
|
|
ResultWord lookupWord(const char *word, int word_len);
|
|
|
|
|
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
/**
|
|
|
|
* Tokenizes a string and compiles it into word_ts.
|
|
|
|
* @param[in] retval A list of words which will be set to the result
|
|
|
|
* @param[out] sentence The sentence to examine
|
|
|
|
* @param[out] error Points to a malloc'd copy of the offending text or to NULL on error
|
|
|
|
* @return true on success, false on failure
|
|
|
|
*
|
|
|
|
* On error, false is returned. If *error is NULL, the sentence did not
|
|
|
|
* contain any useful words; if not, *error points to a malloc'd copy of
|
|
|
|
* the offending word. The returned list may contain anywords.
|
|
|
|
*/
|
2009-05-31 12:05:49 +00:00
|
|
|
bool tokenizeString(ResultWordList &retval, const char *sentence, char **error);
|
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
/**
|
|
|
|
* Builds a parse tree from a list of words, using a set of Greibach Normal
|
|
|
|
* Form rules.
|
|
|
|
* @param words The words to build the tree from
|
|
|
|
* @param verbose Set to true for debugging
|
|
|
|
* @return 0 on success, 1 if the tree couldn't be built in VOCAB_TREE_NODES
|
|
|
|
* nodes or if the sentence structure in 'words' is not part of the
|
|
|
|
* language described by the grammar passed in 'rules'.
|
|
|
|
*/
|
2009-09-30 12:17:38 +00:00
|
|
|
int parseGNF(const ResultWordList &words, bool verbose = false);
|
2009-05-31 12:05:49 +00:00
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
/**
|
|
|
|
* Constructs the Greibach Normal Form of the grammar supplied in 'branches'.
|
|
|
|
* @param verbose Set to true for debugging. If true, the list is
|
|
|
|
* freed before the function ends
|
|
|
|
* @return Pointer to a list of singly linked GNF rules describing the same
|
|
|
|
* language that was described by 'branches'
|
|
|
|
*
|
|
|
|
* The original SCI rules are in almost-CNF (Chomsky Normal Form). Note that
|
|
|
|
* branch[0] is used only for a few magical incantations, as it is treated
|
|
|
|
* specially by the SCI parser.
|
|
|
|
*/
|
|
|
|
ParseRuleList *buildGNF(bool verbose = false);
|
2009-05-31 12:05:49 +00:00
|
|
|
|
2009-05-31 15:08:47 +00:00
|
|
|
/**
|
2009-07-06 10:39:22 +00:00
|
|
|
* Deciphers a said block and dumps its content via printf.
|
2009-05-31 15:08:47 +00:00
|
|
|
* For debugging only.
|
|
|
|
* @param pos pointer to the data to dump
|
|
|
|
*/
|
|
|
|
void decipherSaidBlock(byte *pos);
|
2009-05-31 12:05:49 +00:00
|
|
|
|
|
|
|
/**
|
2009-05-31 15:08:47 +00:00
|
|
|
* Prints the parser suffixes to the debug console.
|
2009-05-31 12:05:49 +00:00
|
|
|
*/
|
2009-05-31 15:08:47 +00:00
|
|
|
void printSuffixes() const;
|
2009-05-31 12:05:49 +00:00
|
|
|
|
|
|
|
/**
|
2009-05-31 15:08:47 +00:00
|
|
|
* Prints the parser words to the debug console.
|
2009-05-31 12:05:49 +00:00
|
|
|
*/
|
2009-05-31 15:08:47 +00:00
|
|
|
void printParserWords() const;
|
2009-05-31 12:05:49 +00:00
|
|
|
|
2009-05-31 15:08:16 +00:00
|
|
|
uint getParserBranchesSize() const { return _parserBranches.size(); }
|
|
|
|
const parse_tree_branch_t &getParseTreeBranch(int number) const { return _parserBranches[number]; }
|
2009-05-31 12:05:49 +00:00
|
|
|
|
2009-09-30 12:17:38 +00:00
|
|
|
/**
|
|
|
|
* Adds a new synonym to the list
|
|
|
|
*/
|
|
|
|
void addSynonym(synonym_t syn) { _synonyms.push_back(syn); }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clears the list of synonyms
|
|
|
|
*/
|
|
|
|
void clearSynonyms() { _synonyms.clear(); }
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Synonymizes a token list
|
|
|
|
* Parameters: (ResultWordList &) words: The word list to synonymize
|
|
|
|
*/
|
|
|
|
void synonymizeTokens(ResultWordList &words);
|
|
|
|
|
|
|
|
void printParserNodes(int num);
|
|
|
|
|
|
|
|
void dumpParseTree();
|
|
|
|
|
|
|
|
int parseNodes(int *i, int *pos, int type, int nr, int argc, const char **argv);
|
|
|
|
|
2009-05-31 12:05:49 +00:00
|
|
|
private:
|
2009-05-30 22:15:00 +00:00
|
|
|
/**
|
2009-05-31 15:08:47 +00:00
|
|
|
* Loads all words from the main vocabulary.
|
2009-05-30 22:15:00 +00:00
|
|
|
* @return true on success, false on failure
|
|
|
|
*/
|
2009-05-31 15:08:47 +00:00
|
|
|
bool loadParserWords();
|
2009-05-30 22:15:00 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Loads all suffixes from the suffix vocabulary.
|
|
|
|
* @return true on success, false on failure
|
|
|
|
*/
|
2009-05-31 15:08:47 +00:00
|
|
|
bool loadSuffixes();
|
2009-05-30 22:15:00 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Frees all suffixes in the given list.
|
|
|
|
* @param suffixes: The suffixes to free
|
|
|
|
*/
|
|
|
|
void freeSuffixes();
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Retrieves all grammar rules from the resource data.
|
|
|
|
* @param branches The rules are stored into this Array
|
|
|
|
* @return true on success, false on error
|
|
|
|
*/
|
2009-06-04 11:44:55 +00:00
|
|
|
bool loadBranches();
|
2009-05-30 22:15:00 +00:00
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
/**
|
|
|
|
* Frees a parser rule list as returned by vocab_build_gnf().
|
|
|
|
* @param rule_list the rule list to free
|
|
|
|
*/
|
|
|
|
void freeRuleList(ParseRuleList *rule_list);
|
2009-05-31 02:37:24 +00:00
|
|
|
|
2009-09-02 12:02:37 +00:00
|
|
|
ResourceManager *_resMan;
|
2009-05-31 12:05:49 +00:00
|
|
|
VocabularyVersions _vocabVersion;
|
2009-05-31 02:37:24 +00:00
|
|
|
|
2009-05-31 12:05:49 +00:00
|
|
|
// Parser-related lists
|
2009-05-30 22:15:00 +00:00
|
|
|
SuffixList _parserSuffixes;
|
2009-10-19 22:13:51 +00:00
|
|
|
ParseRuleList *_parserRules; /**< GNF rules used in the parser algorithm */
|
2009-05-30 22:15:00 +00:00
|
|
|
Common::Array<parse_tree_branch_t> _parserBranches;
|
2009-05-31 12:05:49 +00:00
|
|
|
WordMap _parserWords;
|
2009-09-30 12:17:38 +00:00
|
|
|
SynonymList _synonyms; /**< The list of synonyms */
|
2009-10-19 22:13:51 +00:00
|
|
|
|
|
|
|
public:
|
|
|
|
// Accessed by said()
|
|
|
|
parse_tree_node_t _parserNodes[VOCAB_TREE_NODES]; /**< The parse tree */
|
2009-05-30 22:15:00 +00:00
|
|
|
};
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
/**
|
|
|
|
* Prints a parse tree.
|
|
|
|
* @param tree_name Name of the tree to dump (free-form)
|
|
|
|
* @param nodes The nodes containing the parse tree
|
|
|
|
*/
|
2009-05-28 11:15:09 +00:00
|
|
|
void vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
|
2009-02-15 06:10:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
2009-10-19 22:13:51 +00:00
|
|
|
/**
|
|
|
|
* Builds a parse tree from a spec and compares it to a parse tree.
|
|
|
|
* @param s The affected state
|
|
|
|
* @param spec Pointer to the spec to build
|
|
|
|
* @param verbose Whether to display the parse tree after building it
|
|
|
|
* @return 1 on a match, 0 otherwise
|
|
|
|
*/
|
2009-09-30 12:17:38 +00:00
|
|
|
int said(EngineState *s, byte *spec, bool verbose);
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-21 10:23:36 +00:00
|
|
|
} // End of namespace Sci
|
2009-02-15 06:10:59 +00:00
|
|
|
|
2009-02-27 02:23:00 +00:00
|
|
|
#endif // SCI_SCICORE_VOCABULARY_H
|