Began switch to regexp bytecode.

This commit is contained in:
rogerl%netscape.com 2002-01-23 01:31:39 +00:00
parent c5398cc6c3
commit 386dda3703
4 changed files with 540 additions and 62 deletions

View File

@ -2416,7 +2416,7 @@ JSValue RegExp_exec(Context *cx, const JSValue& thisValue, JSValue *argv, uint32
REState *regexp_result = REExecute(parseResult, str->begin(), str->length());
if (regexp_result) {
JSArrayInstance *resultArray = (JSArrayInstance *)Array_Type->newInstance(cx);
String *matchStr = new String(str->substr(regexp_result->endIndex, regexp_result->length));
String *matchStr = new String(str->substr(regexp_result->startIndex, regexp_result->endIndex - regexp_result->startIndex));
resultArray->setProperty(cx, *numberToString(0), NULL, JSValue(matchStr));
String *parenStr = &cx->Empty_StringAtom;
for (uint32 i = 0; i < regexp_result->n; i++) {
@ -2430,17 +2430,16 @@ JSValue RegExp_exec(Context *cx, const JSValue& thisValue, JSValue *argv, uint32
}
// XXX SpiderMonkey also adds 'index' and 'input' properties to the result
resultArray->setProperty(cx, cx->Index_StringAtom, CURRENT_ATTR, JSValue((float64)(regexp_result->endIndex)));
resultArray->setProperty(cx, cx->Index_StringAtom, CURRENT_ATTR, JSValue((float64)(regexp_result->startIndex)));
resultArray->setProperty(cx, cx->Input_StringAtom, CURRENT_ATTR, JSValue(str));
result = JSValue(resultArray);
// XXX Set up the SpiderMonkey 'RegExp statics'
RegExp_Type->setProperty(cx, cx->LastMatch_StringAtom, CURRENT_ATTR, JSValue(matchStr));
RegExp_Type->setProperty(cx, cx->LastParen_StringAtom, CURRENT_ATTR, JSValue(parenStr));
String *contextStr = new String(str->substr(0, regexp_result->endIndex));
String *contextStr = new String(str->substr(0, regexp_result->startIndex));
RegExp_Type->setProperty(cx, cx->LeftContext_StringAtom, CURRENT_ATTR, JSValue(contextStr));
uint32 matchEnd = regexp_result->endIndex + regexp_result->length;
contextStr = new String(str->substr(matchEnd, str->length() - matchEnd));
contextStr = new String(str->substr(regexp_result->endIndex, str->length() - regexp_result->endIndex));
RegExp_Type->setProperty(cx, cx->RightContext_StringAtom, CURRENT_ATTR, JSValue(contextStr));
}
thisObj->setProperty(cx, cx->LastIndex_StringAtom, CURRENT_ATTR, JSValue((float64)(parseResult->lastIndex)));

View File

@ -126,7 +126,7 @@ static JSValue String_search(Context *cx, const JSValue& thisValue, JSValue *arg
parseResult->lastIndex = lastIndex;
if (regexp_result)
return JSValue((float64)(regexp_result->endIndex));
return JSValue((float64)(regexp_result->startIndex));
else
return JSValue(-1.0);
@ -159,7 +159,7 @@ static JSValue String_match(Context *cx, const JSValue& thisValue, JSValue *argv
break;
if (parseResult->lastIndex == index)
parseResult->lastIndex++;
String *matchStr = new String(S.string->substr(regexp_result->endIndex, regexp_result->length));
String *matchStr = new String(S.string->substr(regexp_result->startIndex, regexp_result->endIndex - regexp_result->startIndex));
A->setProperty(cx, *numberToString(index++), NULL, JSValue(matchStr));
}
regexp.object->setProperty(cx, cx->LastIndex_StringAtom, NULL, JSValue((float64)(parseResult->lastIndex)));
@ -175,14 +175,11 @@ static const String interpretDollar(Context *cx, const String *replaceStr, uint3
case '$':
return cx->Dollar_StringAtom;
case '&':
return searchStr->substr(regexp_result->endIndex, regexp_result->length);
return searchStr->substr(regexp_result->startIndex, regexp_result->endIndex - regexp_result->startIndex);
case '`':
return searchStr->substr(0, regexp_result->endIndex);
return searchStr->substr(0, regexp_result->startIndex);
case '\'':
{
uint32 matchEndIndex = regexp_result->endIndex + regexp_result->length;
return searchStr->substr(matchEndIndex, searchStr->length() - matchEndIndex);
}
return searchStr->substr(regexp_result->endIndex, searchStr->length() - regexp_result->endIndex);
case '0':
case '1':
case '2':
@ -253,12 +250,12 @@ static JSValue String_replace(Context *cx, const JSValue& thisValue, JSValue *ar
break;
}
}
newString += S.string->substr(index, regexp_result->endIndex - index);
newString += S.string->substr(index, regexp_result->startIndex - index);
newString += insertString;
}
else
break;
index = regexp_result->endIndex + regexp_result->length;
index = regexp_result->endIndex;
if ((parseResult->flags & GLOBAL) == 0)
break;
}
@ -268,10 +265,10 @@ static JSValue String_replace(Context *cx, const JSValue& thisValue, JSValue *ar
else {
const String *searchStr = searchValue.toString(cx).string;
REState regexp_result;
regexp_result.endIndex = S.string->find(*searchStr, 0);
if (regexp_result.endIndex == String::npos)
regexp_result.startIndex = S.string->find(*searchStr, 0);
if (regexp_result.startIndex == String::npos)
return JSValue(S.string);
regexp_result.length = searchStr->length();
regexp_result.endIndex = regexp_result.startIndex + searchStr->length();
regexp_result.n = 0;
String insertString;
String newString;
@ -289,9 +286,9 @@ static JSValue String_replace(Context *cx, const JSValue& thisValue, JSValue *ar
break;
}
}
newString += S.string->substr(0, regexp_result.endIndex);
newString += S.string->substr(0, regexp_result.startIndex);
newString += insertString;
uint32 index = regexp_result.endIndex + regexp_result.length;
uint32 index = regexp_result.endIndex;
newString += S.string->substr(index, S.string->length() - index);
return JSValue(new String(newString));
}
@ -330,7 +327,7 @@ static void regexpSplitMatch(const String *S, uint32 q, REParseState *RE, MatchR
REState *regexp_result = REMatch(RE, S->begin() + q, S->length() - q);
if (regexp_result) {
result.endIndex = regexp_result->endIndex + q;
result.endIndex = regexp_result->startIndex + q;
result.failure = false;
result.capturesCount = regexp_result->n;
if (regexp_result->n) {

View File

@ -155,6 +155,7 @@ typedef struct RENodeState {
RENode *node;
REint32 count;
REuint32 index;
REContinuationData continuation;
} RENodeState;
#define INITIAL_STATESTACK (20)
@ -164,7 +165,7 @@ REuint32 maxNodeStateStack;
typedef struct REGlobalData {
REuint32 flags; /* flags from the RE in execution */
REuint32 length; /* length of input string */
REint32 length; /* length of input string */
const REchar *input; /* the input string */
REError error; /* runtime error code (out_of_memory only?) */
} REGlobalData;
@ -921,7 +922,7 @@ static REState *bolMatcher(REGlobalData *globalData, REState *x)
*/
static REState *eolMatcher(REGlobalData *globalData, REState *x)
{
REuint32 e = x->endIndex;
REint32 e = x->endIndex;
if (e != globalData->length) {
if (globalData->flags & MULTILINE) {
if (!RE_ISLINETERM(globalData->input[e]))
@ -997,7 +998,7 @@ static REState *wbndMatcher(REGlobalData *globalData, REState *x, REbool sense)
static REState *dotMatcher(REGlobalData *globalData, REState *x)
{
REchar ch;
REuint32 e = x->endIndex;
REint32 e = x->endIndex;
if (e == globalData->length)
return NULL;
ch = globalData->input[e];
@ -1016,7 +1017,7 @@ static REState *dotMatcher(REGlobalData *globalData, REState *x)
static REState *decMatcher(REGlobalData *globalData, REState *x, REbool sense)
{
REchar ch;
REuint32 e = x->endIndex;
REint32 e = x->endIndex;
if (e == globalData->length)
return NULL;
ch = globalData->input[e];
@ -1036,7 +1037,7 @@ static REState *decMatcher(REGlobalData *globalData, REState *x, REbool sense)
static REState *wsMatcher(REGlobalData *globalData, REState *x, REbool sense)
{
REchar ch;
REuint32 e = x->endIndex;
REint32 e = x->endIndex;
if (e == globalData->length)
return NULL;
ch = globalData->input[e];
@ -1058,7 +1059,7 @@ static REState *wsMatcher(REGlobalData *globalData, REState *x, REbool sense)
static REState *letdigMatcher(REGlobalData *globalData, REState *x, REbool sense)
{
REchar ch;
REuint32 e = x->endIndex;
REint32 e = x->endIndex;
if (e == globalData->length)
return NULL;
ch = globalData->input[e];
@ -1088,7 +1089,7 @@ and a Continuation c, and performs the following:
static REState *flatMatcher(REGlobalData *globalData, REState *x, REchar matchCh)
{
REchar ch;
REuint32 e = x->endIndex;
REint32 e = x->endIndex;
if (e == globalData->length)
return NULL;
ch = globalData->input[e];
@ -1102,7 +1103,7 @@ static REState *flatMatcher(REGlobalData *globalData, REState *x, REchar matchCh
static REState *flatIMatcher(REGlobalData *globalData, REState *x, REchar matchCh)
{
REchar ch;
REuint32 e = x->endIndex;
REint32 e = x->endIndex;
if (e == globalData->length)
return NULL;
ch = globalData->input[e];
@ -1117,10 +1118,10 @@ static REState *flatIMatcher(REGlobalData *globalData, REState *x, REchar matchC
Consecutive literal characters.
*/
static REState *flatNMatcher(REGlobalData *globalData, REState *x,
REchar *matchChars, REuint32 length)
REchar *matchChars, REint32 length)
{
REuint32 e = x->endIndex;
REuint32 i;
REint32 e = x->endIndex;
REint32 i;
if ((e + length) > globalData->length)
return NULL;
for (i = 0; i < length; i++) {
@ -1132,10 +1133,10 @@ static REState *flatNMatcher(REGlobalData *globalData, REState *x,
}
static REState *flatNIMatcher(REGlobalData *globalData, REState *x,
REchar *matchChars, REuint32 length)
REchar *matchChars, REint32 length)
{
REuint32 e = x->endIndex;
REuint32 i;
REint32 e = x->endIndex;
REint32 i;
if ((e + length) > globalData->length)
return NULL;
for (i = 0; i < length; i++) {
@ -1371,8 +1372,8 @@ static REState *classMatcher(REGlobalData *globalData,
{
REchar ch;
CharSet *charSet;
REuint32 byteIndex;
REuint32 e = x->endIndex;
REint32 byteIndex;
REint32 e = x->endIndex;
if (e == globalData->length)
return NULL;
if (target->data.chclass.charSet->bits == NULL) {
@ -1381,7 +1382,7 @@ static REState *classMatcher(REGlobalData *globalData,
}
charSet = target->data.chclass.charSet;
ch = globalData->input[e];
byteIndex = (REuint32)(ch / 8);
byteIndex = ch / 8;
if (target->data.chclass.sense) {
if ((charSet->length == 0) ||
@ -1432,7 +1433,7 @@ static REState *backrefMatcher(REGlobalData *globalData,
{
REuint32 e;
REuint32 len;
REuint32 f;
REint32 f;
REuint32 i;
const REchar *parenContent;
RECapture *s = &x->parens[child->parenIndex];
@ -1498,6 +1499,487 @@ static void freeRENode(RENode *t)
}
}
#if 0
#define EMIT_ARG(pc, a) (pc[0] = ((a) >> 8), pc[1] = (a), pc += 2)
#define EMIT_BRANCH
void emitREBytecode(REParseState *pState, RENode *t)
{
*pc++ = t->kind;
switch (t->kind) {
case REOP_ALT:
EMIT_BRANCH(pc);
emitRegExp(pState, (RENode *)(t->child));
*pc++ = REOP_GOTO;
EMIT_BRANCH(pc);
t = (RENode *)(t->data.child2);
if (t->kind == REOP_ALT) {
}
else {
emitRegExp(pState, (RENode *)(t->child));
*pc++ = REOP_GOTO;
break;
case REOP_FLAT:
if (t->child) {
if (pState->flags & IGNORECASE)
pc[-1] = REOP_FLATNi;
else
pc[-1] = REOP_FLATN;
EMIT_ARG(pc, t->child - pState->srcStart);
EMIT_ARG(pc, t->data.flat.length);
}
else { /* XXX original Monkey code separated ASCII and Unicode cases to save extra byte */
if (pState->flags & IGNORECASE)
pc[-1] = REOP_FLAT1i;
else
pc[-1] = REOP_FLAT1;
EMIT_ARG(pc, t->data.flat.ch);
}
break;
case REOP_PAREN:
EMIT_ARG(pc, t->parenIndex);
emitRegExp(pState, (RENode *)(t->child));
*pc++ = REOP_CLOSEPAREN;
EMIT_ARG(pc, t->parenIndex);
break;
case REOP_STAR:
case REOP_PLUS:
case REOP_MINIMALSTAR:
case REOP_MINIMALPLUS:
emitRegExp(pState, (RENode *)(t->child));
break;
case REOP_QUANT:
case REOP_MINIMALQUANT:
EMIT_ARG(pc, t->data.quantifier.min);
EMIT_ARG(pc, t->data.quantifier.max);
emitRegExp(pState, (RENode *)(t->child));
break;
}
}
static REState *executeREBytecode(RENode *t, REGlobalData *globalData, REState *x)
{
REOp op = t->kind;
REContinuationData currentContinuation;
REState *result;
REBackTrackData *backTrackData;
REint32 k, length;
REbool anchor = false;
REchar anchorCh;
currentContinuation.node = NULL;
/*
* If the first node is a literal match, step the index into
* the string until that match is made, or fail if it can't be
* found at all.
*/
switch (op) {
case REOP_FLAT1:
case REOP_FLAT1i:
anchorCh = GET_ARGNO(pc);
anchor = true;
break;
case REOP_FLATN:
case REOP_FLATNi:
k = GET_ARGNO(pc);
anchorCh = globalData->source[k];
anchor = true;
break;
}
if (anchor) {
anchor = false;
for (k = x->endIndex; k < globalData->length; k++) {
REchar matchCh = globalData->input[k];
if ((matchCh == anchorCh) ||
((globalData->flags & IGNORECASE)
&& (canonicalize(matchCh) == canonicalize(anchorCh))))
x->endIndex = k;
anchor = true;
break;
}
}
if (!anchor)
return NULL;
}
while (true) {
switch (op) {
case REOP_EMPTY:
result = x;
break;
case REOP_BOL:
result = bolMatcher(globalData, x);
break;
case REOP_EOL:
result = eolMatcher(globalData, x);
break;
case REOP_WBND:
result = wbndMatcher(globalData, x, true);
break;
case REOP_UNWBND:
result = wbndMatcher(globalData, x, false);
break;
case REOP_DOT:
result = dotMatcher(globalData, x);
break;
case REOP_DEC:
result = decMatcher(globalData, x, true);
break;
case REOP_UNDEC:
result = decMatcher(globalData, x, false);
break;
case REOP_WS:
result = wsMatcher(globalData, x, true);
break;
case REOP_UNWS:
result = wsMatcher(globalData, x, false);
break;
case REOP_LETDIG:
result = letdigMatcher(globalData, x, true);
break;
case REOP_UNLETDIG:
result = letdigMatcher(globalData, x, false);
break;
case REOP_FLATN:
k = GET_ARGNO(pc);
pc += ARGNO_LEN;
length = GET_ARGNO(pc);
pc += ARGNO_LEN;
result = flatNMatcher(globalData, x, globalData->source + offset, length);
break;
case REOP_FLATNi:
k = GET_ARGNO(pc);
pc += ARGNO_LEN;
length = GET_ARGNO(pc);
pc += ARGNO_LEN;
result = flatNIMatcher(globalData, x, globalData->source + offset, length);
break;
case REOP_FLAT
else
result = flatMatcher(globalData, x, t->data.flat.ch);
break;
case REOP_FLATi:
if (t->child)
result = flatNIMatcher(globalData, x, (REchar *)(t->child),
t->data.flat.length);
else
result = flatIMatcher(globalData, x, t->data.flat.ch);
break;
/* keep the current continuation and provide the alternate path
* as a back track opportunity
*/
case REOP_ALT:
t->continuation = currentContinuation;
currentContinuation.node = t;
currentContinuation.op = REOP_NEXTALT;
if (!pushBackTrack(globalData, REOP_NEXTALT, t, x)) return NULL;
t = (RENode *)(t->child);
ASSERT(t);
op = t->kind;
continue;
case REOP_NEXTALT:
if (result == NULL) {
currentContinuation.node = t;
currentContinuation.op = REOP_NEXTALT;
t = (RENode *)(t->data.child2);
ASSERT(t);
op = t->kind;
continue;
}
else {
result = x;
currentContinuation = t->continuation;
break;
}
/* the child will evntually terminate, so provide a capturing state
* as the continuation
*/
case REOP_PAREN:
t->continuation = currentContinuation;
currentContinuation.op = REOP_CLOSEPAREN;
currentContinuation.node = t;
x->parens[t->parenIndex].index = (REint32)(x->endIndex);
x->parens[t->parenIndex].length = 0;
t = (RENode *)(t->child);
ASSERT(t);
op = t->kind;
continue;
case REOP_CLOSEPAREN:
x->parens[t->parenIndex].length = x->endIndex
- x->parens[t->parenIndex].index;
currentContinuation = t->continuation;
break;
case REOP_QUANT:
t->continuation = currentContinuation;
t->count = 0;
t->index = x->endIndex;
if (t->data.quantifier.greedy) {
/*
* Save the current zero-count state, then jump to the child.
*/
backTrackData = pushBackTrack(globalData, REOP_REPEAT, t, x);
if (!backTrackData) return NULL;
nodeStateStack[nodeStateStackTop].node = t;
nodeStateStack[nodeStateStackTop].count = t->count;
nodeStateStack[nodeStateStackTop].index = x->endIndex;
++nodeStateStackTop;
currentContinuation.node = t;
currentContinuation.op = REOP_REPEAT;
t = (RENode *)(t->child);
op = t->kind;
continue;
}
else {
/*
* Non-greedy, only run the child if the minimum
* requirement hasn't been met
*/
if (t->count < t->data.quantifier.min) {
nodeStateStack[nodeStateStackTop].node = t;
nodeStateStack[nodeStateStackTop].count = t->count;
nodeStateStack[nodeStateStackTop].index = x->endIndex;
++nodeStateStackTop;
currentContinuation.node = t;
currentContinuation.op = REOP_MINIMALREPEAT;
t = (RENode *)(t->child);
op = t->kind;
continue;
}
else {
backTrackData = pushBackTrack(globalData,
REOP_MINIMALREPEAT, t, x);
if (!backTrackData) return NULL;
result = x;
break;
}
}
case REOP_REPEAT:
if (result == NULL) {
/*
* There's been a failure, see if we have enough children
*/
currentContinuation = t->continuation;
if (t->count >= t->data.quantifier.min)
result = x;
break;
}
else {
/*
* Pop us off the stack
*/
--nodeStateStackTop;
ASSERT(nodeStateStack[nodeStateStackTop].node == t);
if ((t->count >= t->data.quantifier.min)
&& (x->endIndex == t->index)) {
/* matched an empty string, that'll get us nowhere */
result = NULL;
currentContinuation = t->continuation;
break;
}
++t->count;
backTrackData = pushBackTrack(globalData, REOP_REPEAT, t, x);
if (!backTrackData) return NULL;
if (t->count == t->data.quantifier.max) {
currentContinuation = t->continuation;
result = NULL;
break;
}
else {
nodeStateStack[nodeStateStackTop].node = t;
nodeStateStack[nodeStateStackTop].count = t->count;
nodeStateStack[nodeStateStackTop].index = x->endIndex;
++nodeStateStackTop;
for (k = 0; k <= t->data.quantifier.parenCount; k++)
x->parens[t->parenIndex + k].index = -1;
t->index = x->endIndex;
currentContinuation.node = t;
currentContinuation.op = REOP_REPEAT;
t = (RENode *)(t->child);
op = t->kind;
}
}
continue;
case REOP_MINIMALREPEAT:
if (result == NULL) {
/*
* Non-greedy failure - try to consume another child
*/
if ((t->data.quantifier.max == -1)
|| (t->count < t->data.quantifier.max)) {
for (k = 0; k <= t->data.quantifier.parenCount; k++)
x->parens[t->parenIndex + k].index = -1;
nodeStateStack[nodeStateStackTop].node = t;
nodeStateStack[nodeStateStackTop].count = t->count;
nodeStateStack[nodeStateStackTop].index = x->endIndex;
++nodeStateStackTop;
currentContinuation.node = t;
currentContinuation.op = REOP_MINIMALREPEAT;
t = (RENode *)(t->child);
op = t->kind;
continue;
}
else
break;
}
else {
--nodeStateStackTop;
ASSERT(nodeStateStack[nodeStateStackTop].node == t);
if ((t->count >= t->data.quantifier.min)
&& (x->endIndex == t->index)) {
/* matched an empty string, that'll get us nowhere */
result = NULL;
currentContinuation = t->continuation;
break;
}
++t->count;
if (t->count < t->data.quantifier.min) {
for (k = 0; k <= t->data.quantifier.parenCount; k++)
x->parens[t->parenIndex + k].index = -1;
nodeStateStack[nodeStateStackTop].node = t;
nodeStateStack[nodeStateStackTop].count = t->count;
nodeStateStack[nodeStateStackTop].index = x->endIndex;
++nodeStateStackTop;
currentContinuation.node = t;
currentContinuation.op = REOP_MINIMALREPEAT;
t->index = x->endIndex;
t = (RENode *)(t->child);
op = t->kind;
continue;
}
else {
backTrackData = pushBackTrack(globalData,
REOP_MINIMALREPEAT, t, x);
if (!backTrackData) return NULL;
currentContinuation = t->continuation;
break;
}
}
case REOP_BACKREF:
result = backrefMatcher(globalData, x, t);
break;
/* supersede the continuation with an assertion tester */
case REOP_ASSERT:
t->continuation = currentContinuation;
currentContinuation.node = t;
currentContinuation.op = REOP_ASSERTTEST;
t->index = x->endIndex;
t->count = backTrackStackTop;
t = (RENode *)(t->child);
ASSERT(t);
op = t->kind;
continue;
/* also provide the assertion tester as the backtrack state */
case REOP_ASSERTNOT:
t->continuation = currentContinuation;
currentContinuation.node = t;
currentContinuation.op = REOP_ASSERTTEST;
t->index = x->endIndex;
t->count = backTrackStackTop;
backTrackData = pushBackTrack(globalData, REOP_ASSERTTEST, t, x);
if (!backTrackData) return NULL;
t = (RENode *)(t->child);
ASSERT(t);
op = t->kind;
continue;
case REOP_ASSERTTEST:
backTrackStackTop = t->count;
x->endIndex = t->index;
if (t->kind == REOP_ASSERT) {
if (result != NULL) {
result = x;
}
}
else {
if (result == NULL)
result = x;
else {
result = NULL;
}
}
currentContinuation = t->continuation;
break;
case REOP_CLASS:
result = classMatcher(globalData, x, t);
if (globalData->error != NO_ERROR) return NULL;
break;
case REOP_END:
if (x != NULL)
return x;
break;
}
/*
* If the match failed and there's a backtrack option, take it.
* Otherwise this is a match failure.
*/
if (result == NULL) {
if (backTrackStackTop > 0) {
backTrackStackTop--;
backTrackData = &backTrackStack[backTrackStackTop];
recoverState(x, backTrackData->state);
free(backTrackData->state);
for (k = 0; k < backTrackData->precedingNodeStateTop; k++) {
RENode *n = backTrackData->precedingNodeState[k].node;
n->count = backTrackData->precedingNodeState[k].count;
n->index = backTrackData->precedingNodeState[k].index;
nodeStateStack[k] = backTrackData->precedingNodeState[k];
}
nodeStateStackTop = backTrackData->precedingNodeStateTop;
if (backTrackData->precedingNodeState)
free(backTrackData->precedingNodeState);
t = backTrackData->continuation.node;
t->count = backTrackData->nodeState.count;
t->index = backTrackData->nodeState.index;
op = backTrackData->continuation.op;
continue;
}
else
return NULL;
}
else
x = result;
/*
* Continue with the expression. If there is no next link, use
* the current continuation.
*/
t = t->next;
if (t)
op = t->kind;
else {
t = currentContinuation.node;
ASSERT(t);
op = currentContinuation.op;
currentContinuation.op = t->continuation.op;
currentContinuation.node = t->continuation.node;
}
}
return NULL;
}
#endif
/*
* Throw away the RegExp and all data associated with it.
*/
@ -1533,9 +2015,10 @@ static REState *executeRENode(RENode *t, REGlobalData *globalData, REState *x)
REbool foundAnchor = false;
if (t->child)
matchCh = *((REchar *)t->child);
for (k = x->endIndex; k < x->length; k++) {
for (k = x->endIndex; k < globalData->length; k++) {
if (globalData->input[k] == matchCh) {
x->length = k;
x->endIndex = k;
x->startIndex = k; /* inform caller that we bumped along */
foundAnchor = true;
break;
}
@ -1601,7 +2084,13 @@ static REState *executeRENode(RENode *t, REGlobalData *globalData, REState *x)
* as a back track opportunity
*/
case REOP_ALT:
t->continuation = currentContinuation;
nodeStateStack[nodeStateStackTop].node = t;
nodeStateStack[nodeStateStackTop].count = t->count;
nodeStateStack[nodeStateStackTop].index = x->endIndex;
nodeStateStack[nodeStateStackTop].continuation = currentContinuation;
++nodeStateStackTop;
// t->continuation = currentContinuation;
currentContinuation.node = t;
currentContinuation.op = REOP_NEXTALT;
if (!pushBackTrack(globalData, REOP_NEXTALT, t, x)) return NULL;
@ -1619,8 +2108,9 @@ static REState *executeRENode(RENode *t, REGlobalData *globalData, REState *x)
continue;
}
else {
--nodeStateStackTop;
result = x;
currentContinuation = t->continuation;
currentContinuation = nodeStateStack[nodeStateStackTop].continuation;//t->continuation;
break;
}
@ -1994,6 +2484,7 @@ static REState *initMatch(REGlobalData *gData, REParseState *parseState,
result->n = parseState->parenCount;
for (j = 0; j < result->n; j++)
result->parens[j].index = -1;
result->startIndex = 0;
result->endIndex = 0;
gData->flags = parseState->flags;
@ -2038,49 +2529,40 @@ REState *REExecute(REParseState *parseState, const REchar *text,
REGlobalData gData;
REint32 i;
REint32 j;
REState *x = initMatch(&gData, parseState, text, length);
if (!x)
return NULL;
if (parseState->flags & GLOBAL) {
i = parseState->lastIndex;
if ((i < 0) || (i > (REint32)length)) {
x->startIndex = parseState->lastIndex;
if ((x->startIndex < 0) || (x->startIndex > length)) {
parseState->lastIndex = 0;
free(x);
return NULL;
}
}
else
i = 0;
if (!initMatch(&gData, parseState, text, length)) {
free(x);
return NULL;
x->endIndex = x->startIndex;
}
while (true) {
x->endIndex = (REuint32)i;
result = executeRENode(parseState->result, &gData, x);
for (j = 0; j < backTrackStackTop; j++)
free(backTrackStack[j].state);
for (i = 0; i < backTrackStackTop; i++)
free(backTrackStack[i].state);
backTrackStackTop = 0;
nodeStateStackTop = 0;
if (gData.error != NO_ERROR) return NULL;
if (result == NULL) {
i++;
if (i > length) {
x->startIndex++;
if (x->startIndex > length) {
parseState->lastIndex = 0;
free(x);
return NULL;
}
x->endIndex = x->startIndex;
}
else {
if (parseState->flags & GLOBAL)
parseState->lastIndex = (REint32)(result->endIndex);
result->length = result->endIndex - i;
result->endIndex = (REuint32)(i);
parseState->lastIndex = result->endIndex;
break;
}
}

View File

@ -88,8 +88,8 @@ typedef struct RECapture {
} RECapture;
typedef struct REState {
REint32 startIndex;
REint32 endIndex;
REint32 length; /* the length of a succesful match */
REint32 n; /* set to (n - 1), i.e. for /((a)b)/, this field is 1 */
RECapture parens[1]; /* first of 'n' captures, allocated at end of this struct */
} REState;