2009-02-15 06:10:59 +00:00
|
|
|
/***************************************************************************
|
|
|
|
vocabulary.h Copyright (C) 1999,2000,01 Christoph Reichenbach
|
|
|
|
|
|
|
|
|
|
|
|
This program may be modified and copied freely according to the terms of
|
|
|
|
the GNU general public license (GPL), as long as the above copyright
|
|
|
|
notice and the licensing information contained herein are preserved.
|
|
|
|
|
|
|
|
Please refer to www.gnu.org for licensing details.
|
|
|
|
|
|
|
|
This work is provided AS IS, without warranty of any kind, expressed or
|
|
|
|
implied, including but not limited to the warranties of merchantibility,
|
|
|
|
noninfringement, and fitness for a specific purpose. The author will not
|
|
|
|
be held liable for any damage caused by this work or derivatives of it.
|
|
|
|
|
|
|
|
By using this source code, you agree to the licensing terms as stated
|
|
|
|
above.
|
|
|
|
|
|
|
|
|
|
|
|
Please contact the maintainer for bug reports or inquiries.
|
|
|
|
|
|
|
|
Current Maintainer:
|
|
|
|
|
|
|
|
Christoph Reichenbach (CJR) [jameson@linuxgames.com]
|
|
|
|
|
|
|
|
***************************************************************************/
|
|
|
|
|
|
|
|
#ifndef VOCABULARY_H
|
|
|
|
#define VOCABULARY_H
|
|
|
|
|
2009-02-15 08:34:13 +00:00
|
|
|
#include "sci/include/versions.h"
|
|
|
|
#include "sci/include/sciresource.h"
|
2009-02-15 06:10:59 +00:00
|
|
|
|
|
|
|
/*#define VOCABULARY_DEBUG */
|
|
|
|
/*#define SCI_SIMPLE_SAID_CODE */ /* Whether the simplified Said() matching should be used */
|
|
|
|
/*#define SCI_SIMPLE_SAID_DEBUG */ /* uncomment to enable simple said debugging */
|
|
|
|
|
|
|
|
|
|
|
|
#define SCRIPT_UNKNOWN_FUNCTION_STRING "[Unknown]"
|
|
|
|
/* The string used to identify the "unknown" SCI0 function for each game */
|
|
|
|
|
|
|
|
#define PARSE_HEAP_SIZE 64
|
|
|
|
/* Number of bytes allocated on the heap to store bad words if parsing fails */
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct opcode_
|
|
|
|
{
|
|
|
|
int type;
|
|
|
|
int number;
|
|
|
|
char* name;
|
|
|
|
} opcode;
|
|
|
|
|
|
|
|
#define VOCAB_RESOURCE_OPCODES 998
|
|
|
|
#define VOCAB_RESOURCE_KNAMES 999
|
|
|
|
|
|
|
|
#define VOCAB_RESOURCE_SCI0_MAIN_VOCAB 0
|
|
|
|
#define VOCAB_RESOURCE_SCI0_PARSE_TREE_BRANCHES 900
|
|
|
|
#define VOCAB_RESOURCE_SCI0_SUFFIX_VOCAB 901
|
|
|
|
|
|
|
|
#define VOCAB_RESOURCE_SCI1_MAIN_VOCAB 900
|
|
|
|
#define VOCAB_RESOURCE_SCI1_PARSE_TREE_BRANCHES 901
|
|
|
|
#define VOCAB_RESOURCE_SCI1_SUFFIX_VOCAB 902
|
|
|
|
#define VOCAB_RESOURCE_SCI1_CHAR_TRANSFORMS 913
|
|
|
|
|
|
|
|
#define VOCAB_CLASS_PREPOSITION 0x01
|
|
|
|
#define VOCAB_CLASS_ARTICLE 0x02
|
|
|
|
#define VOCAB_CLASS_ADJECTIVE 0x04
|
|
|
|
#define VOCAB_CLASS_PRONOUN 0x08
|
|
|
|
#define VOCAB_CLASS_NOUN 0x10
|
|
|
|
#define VOCAB_CLASS_INDICATIVE_VERB 0x20
|
|
|
|
#define VOCAB_CLASS_ADVERB 0x40
|
|
|
|
#define VOCAB_CLASS_IMPERATIVE_VERB 0x80
|
|
|
|
#define VOCAB_CLASS_NUMBER 0x001
|
|
|
|
|
|
|
|
extern DLLEXTERN const char *class_names[]; /* Vocabulary class names */
|
|
|
|
|
|
|
|
#define VOCAB_CLASS_ANYWORD 0xff
|
|
|
|
/* Anywords are ignored by the parser */
|
|
|
|
|
|
|
|
#define VOCAB_MAGIC_NUMBER_GROUP 0xffd /* 0xffe ? */
|
|
|
|
/* This word class is used for numbers */
|
|
|
|
|
|
|
|
#define VOCAB_TREE_NODES 500
|
|
|
|
/* Number of nodes for each parse_tree_node structure */
|
|
|
|
|
|
|
|
#define VOCAB_TREE_NODE_LAST_WORD_STORAGE 0x140
|
|
|
|
#define VOCAB_TREE_NODE_COMPARE_TYPE 0x146
|
|
|
|
#define VOCAB_TREE_NODE_COMPARE_GROUP 0x14d
|
|
|
|
#define VOCAB_TREE_NODE_FORCE_STORAGE 0x154
|
|
|
|
|
|
|
|
#define SAID_COMMA 0xf0
|
|
|
|
#define SAID_AMP 0xf1
|
|
|
|
#define SAID_SLASH 0xf2
|
|
|
|
#define SAID_PARENO 0xf3
|
|
|
|
#define SAID_PARENC 0xf4
|
|
|
|
#define SAID_BRACKO 0xf5
|
|
|
|
#define SAID_BRACKC 0xf6
|
|
|
|
#define SAID_HASH 0xf7
|
|
|
|
#define SAID_LT 0xf8
|
|
|
|
#define SAID_GT 0xf9
|
|
|
|
#define SAID_TERM 0xff
|
|
|
|
|
|
|
|
#define SAID_FIRST SAID_COMMA
|
|
|
|
|
|
|
|
/* There was no 'last matching word': */
|
|
|
|
#define SAID_FULL_MATCH 0xffff
|
|
|
|
#define SAID_NO_MATCH 0xfffe
|
|
|
|
#define SAID_PARTIAL_MATCH 0xfffd
|
|
|
|
|
|
|
|
#define SAID_LONG(x) ((x) << 8)
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
|
|
|
int w_class; /* Word class */
|
|
|
|
int group; /* Word group */
|
|
|
|
char word[1]; /* The actual word */
|
|
|
|
|
|
|
|
} word_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
int id; /* non-terminal ID */
|
|
|
|
int first_special; /* first terminal or non-terminal */
|
|
|
|
int specials_nr; /* number of terminals and non-terminals */
|
|
|
|
int length;
|
|
|
|
int data[1]; /* actual data (size 1 to avoid compiler warnings) */
|
|
|
|
} parse_rule_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct _parse_rule_list {
|
|
|
|
int terminal; /* Terminal character this rule matches against or 0 for a non-terminal rule */
|
|
|
|
parse_rule_t *rule;
|
|
|
|
struct _parse_rule_list *next;
|
|
|
|
} parse_rule_list_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
|
|
|
int class_mask; /* the word class this suffix applies to */
|
|
|
|
int result_class; /* the word class a word is morphed to if it doesn't fail this check */
|
|
|
|
|
|
|
|
int alt_suffix_length; /* String length of the suffix */
|
|
|
|
int word_suffix_length; /* String length of the other suffix */
|
|
|
|
|
|
|
|
char *alt_suffix; /* The alternative suffix */
|
|
|
|
char *word_suffix; /* The suffix as used in the word vocabulary */
|
|
|
|
|
|
|
|
} suffix_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
|
|
|
int w_class; /* Word class */
|
|
|
|
int group; /* Word group */
|
|
|
|
|
|
|
|
} result_word_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct
|
|
|
|
{
|
|
|
|
int replaceant; /* The word group to replace */
|
|
|
|
int replacement; /* The replacement word group for this one */
|
|
|
|
} synonym_t;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
|
|
|
int id;
|
|
|
|
|
|
|
|
int data[10];
|
|
|
|
|
|
|
|
} parse_tree_branch_t;
|
|
|
|
|
|
|
|
#define PARSE_TREE_NODE_LEAF 0
|
|
|
|
#define PARSE_TREE_NODE_BRANCH 1
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
|
|
|
short type; /* leaf or branch */
|
|
|
|
|
|
|
|
union {
|
|
|
|
|
|
|
|
int value; /* For leaves */
|
|
|
|
short branches[2]; /* For branches */
|
|
|
|
|
|
|
|
} content;
|
|
|
|
|
|
|
|
} parse_tree_node_t;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*FIXME: These need freeing functions...*/
|
|
|
|
|
|
|
|
int* vocabulary_get_classes(resource_mgr_t *resmgr, int *count);
|
|
|
|
|
|
|
|
int vocabulary_get_class_count(resource_mgr_t *resmgr);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a null terminated array of selector names.
|
|
|
|
*/
|
|
|
|
char** vocabulary_get_snames(resource_mgr_t *resmgr, int *pcount, sci_version_t version);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Frees the aforementioned array
|
|
|
|
*/
|
|
|
|
void vocabulary_free_snames(char **snames_list);
|
|
|
|
|
|
|
|
/* Look up a selector name in an array, return the index */
|
|
|
|
int vocabulary_lookup_sname(char **snames_list, char *sname);
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a null terminated array of opcodes.
|
|
|
|
*/
|
|
|
|
opcode* vocabulary_get_opcodes(resource_mgr_t *resmgr);
|
|
|
|
|
|
|
|
void
|
|
|
|
vocabulary_free_opcodes(opcode *opcodes);
|
|
|
|
/* Frees a previously allocated list of opcodes
|
|
|
|
** Parameters: (opcode *) opcodes: Opcodes to free
|
|
|
|
** Returns : (void)
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns a null terminated array of kernel function names.
|
|
|
|
*
|
|
|
|
* This function reads the kernel function name table from resource_map,
|
|
|
|
* and returns a null terminated array of deep copies of them.
|
|
|
|
* The returned array has the same format regardless of the format of the
|
|
|
|
* name table of the resource (the format changed between version 0 and 1).
|
|
|
|
*/
|
|
|
|
char** vocabulary_get_knames(resource_mgr_t *resmgr, int* count);
|
|
|
|
void vocabulary_free_knames(char** names);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
word_t **
|
|
|
|
vocab_get_words(resource_mgr_t *resmgr, int *word_counter);
|
|
|
|
/* Gets all words from the main vocabulary
|
|
|
|
** Parameters: (resource_mgr_t *) resmr: The resource manager to read from
|
|
|
|
** (int *) word_counter: The int which the number of words is stored in
|
|
|
|
** Returns : (word_t **): A list of all words, dynamically allocated
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_free_words(word_t **words, int words_nr);
|
|
|
|
/* Frees memory allocated by vocab_get_words
|
|
|
|
** Parameters: (word_t **) words: The words to free
|
|
|
|
** (int) words_nr: Number of words in the structure
|
|
|
|
** Returns : (void)
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
suffix_t **
|
|
|
|
vocab_get_suffices(resource_mgr_t *resmgr, int *suffices_nr);
|
|
|
|
/* Gets all suffixes from the suffix vocabulary
|
|
|
|
** Parameters: (resource_mgr_t*) resmgr: Resource manager the resources are
|
|
|
|
** read from
|
|
|
|
** (int *) suffices_nr: The variable to store the number of suffices in
|
|
|
|
** Returns : (suffix_t **): A list of suffixes
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_free_suffices(resource_mgr_t *resmgr, suffix_t **suffices, int suffices_nr);
|
|
|
|
/* Frees suffices_nr suffices
|
|
|
|
** Parameters: (resource_mgr_t *) resmgr: The resource manager to free from
|
|
|
|
** (suffix_t **) suffices: The suffixes to free
|
|
|
|
** (int) suffices_nr: Number of entrie sin suffices
|
|
|
|
** Returns : (void)
|
|
|
|
*/
|
|
|
|
|
|
|
|
parse_tree_branch_t *
|
|
|
|
vocab_get_branches(resource_mgr_t *resmgr, int *branches_nr);
|
|
|
|
/* Retrieves all grammar rules from the resource data
|
|
|
|
** Parameters: (resource_mgr_t*) resmgr: Resource manager the rules are
|
|
|
|
** read from
|
|
|
|
** (int *) branches_nr: Pointer to the variable which the number of entries is to be
|
|
|
|
** stored in
|
|
|
|
** Returns : (parse_tree_branch_t *): The rules, or NULL on error
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_free_branches(parse_tree_branch_t *parser_branches);
|
|
|
|
/* Frees all branches
|
|
|
|
** Parameters: (parse_tree_branch_t *) parser_branches: The branches to free
|
|
|
|
** Returns : (null)
|
|
|
|
*/
|
|
|
|
|
|
|
|
result_word_t *
|
|
|
|
vocab_lookup_word(char *word, int word_len,
|
|
|
|
word_t **words, int words_nr,
|
|
|
|
suffix_t **suffices, int suffices_nr);
|
|
|
|
/* Looks up a single word in the words and suffixes list
|
|
|
|
** Parameters: (char *) word: Pointer to the word to look up
|
|
|
|
** (int) word_len: Length of the word to look up
|
|
|
|
** (word_t **) words: List of words
|
|
|
|
** (int) words_nr: Number of elements in 'words'
|
|
|
|
** (suffix_t **) suffices: List of suffices
|
|
|
|
** (int) suffices_nr: Number of entries in 'suffices'
|
|
|
|
** Returns : (result_word_t *) A malloc'd result_word_t, or NULL if the word
|
|
|
|
** could not be found.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
result_word_t *
|
|
|
|
vocab_tokenize_string(char *sentence, int *result_nr,
|
|
|
|
word_t **words, int words_nr,
|
|
|
|
suffix_t **suffices, int suffices_nr,
|
|
|
|
char **error);
|
|
|
|
/* Tokenizes a string and compiles it into word_ts.
|
|
|
|
** Parameters: (char *) sentence: The sentence to examine
|
|
|
|
** (int *) result_nr: The variable to store the resulting number of words in
|
|
|
|
** (word_t **) words: The words to scan for
|
|
|
|
** (int) words_nr: Number of words to scan for
|
|
|
|
** (suffix_t **) suffices: suffixes to scan for
|
|
|
|
** (int) suffices_nr: Number of suffices to scan for
|
|
|
|
** (char **) error: Points to a malloc'd copy of the offending text or to NULL on error
|
|
|
|
** Returns : (word_t *): A list of word_ts containing the result, or NULL.
|
|
|
|
** On error, NULL is returned. If *error is NULL, the sentence did not contain any useful words;
|
|
|
|
** if not, *error points to a malloc'd copy of the offending word.
|
|
|
|
** The returned list may contain anywords.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
parse_rule_list_t *
|
|
|
|
vocab_build_gnf(parse_tree_branch_t *branches, int branches_nr);
|
|
|
|
/* Constructs the Greibach Normal Form of the grammar supplied in 'branches'
|
|
|
|
** Parameters: (parse_tree_branch_t *) branches: The parser's branches
|
|
|
|
** (int) branches_nr: Number of parser branches
|
|
|
|
** Returns : (parse_rule_list_t *): Pointer to a list of singly linked
|
|
|
|
** GNF rules describing the same language
|
|
|
|
** that was described by 'branches'
|
|
|
|
** The original SCI rules are in almost-CNF (Chomsky Normal Form). Note that
|
|
|
|
** branch[0] is used only for a few magical incantations, as it is treated
|
|
|
|
** specially by the SCI parser.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_free_rule_list(parse_rule_list_t *rule_list);
|
|
|
|
/* Frees a parser rule list as returned by vocab_build_gnf()
|
|
|
|
** Parameters: (parse_rule_list_t *) rule_list: The rule list to free
|
|
|
|
** Returns : (void)
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
vocab_build_parse_tree(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
|
|
|
|
parse_tree_branch_t *branch0, parse_rule_list_t *rules);
|
|
|
|
/* Builds a parse tree from a list of words
|
|
|
|
** Parameters: (parse_tree_node_t *) nodes: A node list to store the tree in (must have
|
|
|
|
** at least VOCAB_TREE_NODES entries)
|
|
|
|
** (result_word_t *) words: The words to build the tree from
|
|
|
|
** (int) words_nr: The number of words
|
|
|
|
** (parse_tree_branch_t *) branche0: The zeroeth original branch of the
|
|
|
|
** original CNF parser grammar
|
|
|
|
** (parse_rule_list *) rules: The GNF ruleset to parse with
|
|
|
|
** Returns : 0 on success, 1 if the tree couldn't be built in VOCAB_TREE_NODES nodes
|
|
|
|
** or if the sentence structure in 'words' is not part of the language
|
|
|
|
** described by the grammar passed in 'rules'.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_dump_parse_tree(const char *tree_name, parse_tree_node_t *nodes);
|
|
|
|
/* Prints a parse tree
|
|
|
|
** Parameters: (const char *) tree_name: Name of the tree to dump (free-form)
|
|
|
|
** (parse_tree_node_t *) nodes: The nodes containing the parse tree
|
|
|
|
** Returns : (void)
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct _state;
|
|
|
|
|
|
|
|
int
|
|
|
|
said(struct _state *s, byte *spec, int verbose);
|
|
|
|
/* Builds a parse tree from a spec and compares it to a parse tree
|
|
|
|
** Parameters: (state_t *) s: The affected state
|
|
|
|
** (byte *) spec: Pointer to the spec to build
|
|
|
|
** (int) verbose: Whether to display the parse tree after building it
|
|
|
|
** Returns : (int) 1 on a match, 0 otherwise
|
|
|
|
*/
|
|
|
|
|
|
|
|
const char *
|
|
|
|
vocab_get_any_group_word(int group, word_t **words, int words_nr);
|
|
|
|
/* Gets any word from the specified group.
|
|
|
|
** Parameters: (int) group: Group number.
|
|
|
|
** (word_t **) words: List of words
|
|
|
|
** (int) words_nr: Count of words in the list.
|
|
|
|
** For debugging only.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_decypher_said_block(struct _state *s, byte *pos);
|
|
|
|
/* Decyphers a said block and dumps its content via sciprintf.
|
|
|
|
** Parameters: (state_t *) s: The state to use
|
|
|
|
** (byte *) pos: Pointer to the data to dump
|
|
|
|
** For debugging only.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_synonymize_tokens(result_word_t *words, int words_nr, synonym_t *synonyms, int synonyms_nr);
|
|
|
|
/* Synonymizes a token list
|
|
|
|
** Parameters: (result_wort_t *) words: The word list to synonymize
|
|
|
|
** (int) words_nr: Number of word_ts in the list
|
|
|
|
** (synonym_t *) synonyms: Synonym list
|
|
|
|
** (int) synonyms_nr: Number of synonyms in the list
|
|
|
|
*/
|
|
|
|
|
|
|
|
int
|
|
|
|
vocab_gnf_parse(parse_tree_node_t *nodes, result_word_t *words, int words_nr,
|
|
|
|
parse_tree_branch_t *branch0, parse_rule_list_t *tlist, int verbose);
|
|
|
|
|
|
|
|
void
|
|
|
|
vocab_gnf_dump(parse_tree_branch_t *branches, int branches_nr);
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|