mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-01 06:35:42 +00:00
349 lines
9.7 KiB
C
349 lines
9.7 KiB
C
|
|
/* W3 Copyright statement
|
|
Copyright 1995 by: Massachusetts Institute of Technology (MIT), INRIA</H2>
|
|
|
|
This W3C software is being provided by the copyright holders under the
|
|
following license. By obtaining, using and/or copying this software,
|
|
you agree that you have read, understood, and will comply with the
|
|
following terms and conditions:
|
|
|
|
Permission to use, copy, modify, and distribute this software and its
|
|
documentation for any purpose and without fee or royalty is hereby
|
|
granted, provided that the full text of this NOTICE appears on
|
|
<EM>ALL</EM> copies of the software and documentation or portions
|
|
thereof, including modifications, that you make.
|
|
|
|
<B>THIS SOFTWARE IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE NO
|
|
REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE,
|
|
BUT NOT LIMITATION, COPYRIGHT HOLDERS MAKE NO REPRESENTATIONS OR
|
|
WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR
|
|
THAT THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY
|
|
THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
|
|
COPYRIGHT HOLDERS WILL BEAR NO LIABILITY FOR ANY USE OF THIS SOFTWARE
|
|
OR DOCUMENTATION.
|
|
|
|
The name and trademarks of copyright holders may NOT be used
|
|
in advertising or publicity pertaining to the software without
|
|
specific, written prior permission. Title to copyright in this
|
|
software and any associated documentation will at all times remain
|
|
with copyright holders.
|
|
*/
|
|
/* Parser for libpics
|
|
PARSER FOR LIBPICS
|
|
|
|
*/
|
|
/*
|
|
** (c) COPYRIGHT MIT 1996.
|
|
** Please first read the full copyright statement in the file COPYRIGH.
|
|
*/
|
|
/*
|
|
|
|
This module provides the interface to CSParse.c. The parser is used to parse labels,
|
|
machine-readable descriptions, and users. The application creates one of these and
|
|
iteratevely calls CSParse_parseChunk until it returns a done or an error.
|
|
|
|
*/
|
|
#ifndef CSPARSE_H
|
|
#define CSPARSE_H
|
|
#include "cslutils.h"
|
|
#include "htchunk.h"
|
|
/*
|
|
|
|
NOWIN
|
|
|
|
tells CSParse where it is in the task of tokenizing
|
|
|
|
*/
|
|
typedef enum {
|
|
NowIn_INVALID = 0,
|
|
NowIn_NEEDOPEN,
|
|
NowIn_ENGINE,
|
|
NowIn_NEEDCLOSE,
|
|
NowIn_END,
|
|
NowIn_MATCHCLOSE,
|
|
NowIn_ERROR,
|
|
NowIn_CHAIN
|
|
} NowIn_t;
|
|
/*
|
|
|
|
Construction/Destruction
|
|
|
|
The parse objects are never created by the application, but instead by one of the
|
|
objects that it is used to parse.
|
|
|
|
*/
|
|
extern CSParse_t * CSParse_new(void);
|
|
extern void CSParse_delete(CSParse_t * me);
|
|
/*
|
|
|
|
some handy definitions
|
|
|
|
*/
|
|
#define LPAREN '('
|
|
#define RPAREN ')'
|
|
#define LCURLY '{'
|
|
#define RCURLY '}'
|
|
#define LBRACKET '['
|
|
#define RBRACKET ']'
|
|
#define SQUOTE 0x27 /* avoid confusing parens checking editors */
|
|
#define DQUOTE 0x22
|
|
#define LPARENSTR "("
|
|
#define RPARENSTR ")"
|
|
#define raysize(A) (sizeof(A)/sizeof(A[0]))
|
|
/*
|
|
|
|
SUBPARSER DATA
|
|
|
|
PUNCT
|
|
|
|
valid punctuation
|
|
|
|
*/
|
|
typedef enum {Punct_ZERO = 1, Punct_WHITE = 2, Punct_LPAREN = 4,
|
|
Punct_WHITE_LPAREN = (Punct_WHITE|Punct_LPAREN),
|
|
Punct_RPAREN = 8, Punct_LPAREN_RPAREN = (Punct_LPAREN|Punct_RPAREN),
|
|
Punct_WHITE_LPAREN_RPAREN = (Punct_WHITE|Punct_LPAREN|Punct_RPAREN),
|
|
Punct_ALL = 0xf} Punct_t;
|
|
/*
|
|
|
|
SUBSTATE
|
|
|
|
Enumerated bits that are used to mark a parsing state. Because they are bits, as
|
|
opposed to sequential numbers, a StateToken may or more than one together and serve
|
|
more than one state. They must have identical outcomes if this is to be exploited.
|
|
|
|
By convention, the following SubState names are used:
|
|
|
|
X - has no state
|
|
|
|
N - is a newly created object
|
|
|
|
A-H - substate definitions. Because they are non-conflicting bits, a subparser may have
|
|
options that sit in more than state. For instance, the string "error" may be matched in
|
|
states A and C with:
|
|
|
|
{"error test", SubState_A|SubState_C, Punct_LPAREN, 0, "error"} *probs* I meant to keep
|
|
these 16 bit caompatible, but ran up short at the end of one StateToken list. This can
|
|
be fixed if anyone needs a 16 bit enum.
|
|
|
|
*/
|
|
typedef enum {SubState_X = -1, SubState_N = 0x4000, SubState_A = 1,
|
|
SubState_B = 2, SubState_C = 4, SubState_D = 8,
|
|
SubState_E = 0x10, SubState_F = 0x20, SubState_G = 0x40,
|
|
SubState_H = 0x80, SubState_I = 0x100} SubState_t;
|
|
/*
|
|
|
|
forward declaration for StateToken_t
|
|
|
|
*/
|
|
typedef struct StateToken_s StateToken_t;
|
|
/*
|
|
|
|
ENGINE
|
|
|
|
called by CSParse to process tokens and punctuation
|
|
|
|
*/
|
|
typedef NowIn_t Engine_t(CSParse_t * pCSParse, char demark, void * pVoid);
|
|
/*
|
|
|
|
Engine employed by the Label, MacRed, and User parsers
|
|
|
|
*/
|
|
Engine_t CSParse_targetParser;
|
|
/*
|
|
|
|
SUBSTATE METHODS
|
|
|
|
All methods return a StateRet.
|
|
|
|
Check
|
|
|
|
see if a value is legitimate, may also record it
|
|
|
|
*/
|
|
typedef StateRet_t Check_t(CSParse_t * pCSParse, StateToken_t * pStateToken,
|
|
char * token, char demark);
|
|
/*
|
|
|
|
Punctuation checker to be employed by Check_t functions
|
|
|
|
*/
|
|
extern BOOL Punct_badDemark(Punct_t validPunctuation, char demark);
|
|
/*
|
|
|
|
Open
|
|
|
|
create a new data structure to be filled by the parser
|
|
|
|
*/
|
|
typedef StateRet_t Open_t(CSParse_t * pCSParse, char * token, char demark);
|
|
/*
|
|
|
|
Close
|
|
|
|
tell the state that the data structure is no longer current
|
|
|
|
*/
|
|
typedef StateRet_t Close_t(CSParse_t * pCSParse, char * token, char demark);
|
|
/*
|
|
|
|
Prep
|
|
|
|
get ready for next state
|
|
|
|
*/
|
|
typedef StateRet_t Prep_t(CSParse_t * pCSParse, char * token, char demark);
|
|
/*
|
|
|
|
Destroy
|
|
|
|
something went wrong, throw away the current object
|
|
|
|
*/
|
|
typedef void Destroy_t(CSParse_t * pCSParse);
|
|
/*
|
|
|
|
COMMAND
|
|
|
|
substate commands
|
|
|
|
open - call the open function for the current data structure
|
|
|
|
close - call the close
|
|
|
|
chain - call again on the next state without re-reading data
|
|
|
|
notoken - clear the token before a chain (so next state just gets punct)
|
|
|
|
matchany - match any string
|
|
|
|
*/
|
|
typedef enum {Command_NONE = 0, Command_OPEN = 1, Command_CLOSE = 2,
|
|
Command_CHAIN = 4, Command_NOTOKEN = 8,
|
|
Command_CLOSE_CHAIN = (Command_CLOSE|Command_CHAIN),
|
|
Command_CHAIN_NOTOKEN = (Command_CHAIN|Command_NOTOKEN),
|
|
Command_CLOSE_CHAIN_NOTOKEN = (Command_CLOSE|Command_CHAIN|Command_NOTOKEN),
|
|
Command_MATCHANY = 0x10,
|
|
Command_MATCHANY_OPEN_CHAIN = (Command_MATCHANY|Command_OPEN|Command_CHAIN),
|
|
Command_MATCHANY_CLOSE = (Command_MATCHANY|Command_CLOSE),
|
|
Command_MATCHANY_CLOSE_CHAIN = (Command_MATCHANY|Command_CLOSE|Command_CHAIN)
|
|
} Command_t;
|
|
/*
|
|
|
|
STATETOKEN STRUCTURE
|
|
|
|
Contains all the information about what tokens are expected in what substates. The
|
|
StateTokens are kept in array referenced by a TargetObject.
|
|
|
|
*/
|
|
struct StateToken_s {
|
|
char * note; /* some usefull text that describes the state - usefulll f
|
|
or debugging */
|
|
SubState_t validSubStates;
|
|
Punct_t validPunctuation;
|
|
Check_t * pCheck; /* call this function to check token */
|
|
char * name1; /* or compare to this name */
|
|
char * name2; /* many strings have 2 spellings ("ratings" vs. "r") */
|
|
CSParseTC_t targetChange; /* whether target change implies diving or climbing from cur
|
|
rent state */
|
|
TargetObject_t * pNextTargetObject;
|
|
SubState_t nextSubState;
|
|
Command_t command; /* open, close, chain, etc. */
|
|
Prep_t * pPrep; /* prepare for next state */
|
|
};
|
|
/*
|
|
|
|
TARGETOBJECT STRUCTURE
|
|
|
|
Methods and a lists of StateTokens associated with a data structure. The methods know
|
|
how to read data into current object and the StateTokens tell when to proceed to the
|
|
next object.
|
|
|
|
*/
|
|
struct TargetObject_s {
|
|
char * note;
|
|
Open_t * pOpen; /* call this function to open structure */
|
|
Close_t * pClose; /* call this function to close structure */
|
|
Destroy_t * pDestroy;
|
|
StateToken_t * stateTokens; /* array of sub states */
|
|
int stateTokenCount; /* number of sub states */
|
|
CSParseTC_t targetChange; /* target change signal for opening this parse state */
|
|
};
|
|
/*
|
|
|
|
VALTARGET
|
|
|
|
*/
|
|
typedef union {
|
|
BVal_t * pTargetBVal;
|
|
FVal_t * pTargetFVal;
|
|
SVal_t * pTargetSVal;
|
|
DVal_t * pTargetDVal;
|
|
HTList ** pTargetList;
|
|
} ValTarget_t;
|
|
/*
|
|
|
|
VALTYPE
|
|
|
|
Write down what value is to be read, and what type it is
|
|
|
|
*/
|
|
typedef enum {ValType_NONE, ValType_BVAL, ValType_FVAL,
|
|
ValType_SVAL, ValType_DVAL,
|
|
ValType_COMMENT} ValType_t;
|
|
/*
|
|
|
|
PARSECONTEXT
|
|
|
|
Part of a CSParse. The boundry is a litte fuzzy. Maybe it should not exist.
|
|
|
|
*/
|
|
typedef struct {
|
|
Engine_t * engineOf;
|
|
TargetChangeCallback_t * pTargetChangeCallback;
|
|
ParseErrorHandler_t * pParseErrorHandler;
|
|
|
|
/* for reading [BFSD]Val_t */
|
|
ValTarget_t valTarget;
|
|
ValType_t valType;
|
|
|
|
char * pTokenError;
|
|
|
|
BOOL observeQuotes;
|
|
BOOL observedQuotes;
|
|
char * legalChars;
|
|
int legalCharCount;
|
|
} ParseContext_t;
|
|
/*
|
|
|
|
CSPARSE STRUCTURE
|
|
|
|
Full parser state and pointer to the object that it is reading.
|
|
|
|
*/
|
|
struct CSParse_s {
|
|
char quoteState;
|
|
NowIn_t nowIn;
|
|
HTChunk * token;
|
|
char demark;
|
|
int offset;
|
|
int depth;
|
|
ParseContext_t * pParseContext;
|
|
union { /* all the types this parse engine fills */
|
|
CSMachRead_t * pCSMachRead; /* defined in CSMacRed.c */
|
|
CSLabel_t * pCSLabel; /* defined in CSLabel.c */
|
|
CSUser_t * pCSUser; /* defined in CSUser.c */
|
|
} target;
|
|
TargetObject_t * pTargetObject;
|
|
SubState_t currentSubState;
|
|
StateToken_t * pStateToken;
|
|
};
|
|
/*
|
|
|
|
*/
|
|
#endif /* CSPARSE_H */
|
|
/*
|
|
|
|
End of Declaration */
|