mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-11 08:13:35 +00:00
3312 lines
84 KiB
C
3312 lines
84 KiB
C
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
|
|
*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
|
|
/*
|
|
* JS regular expressions, after Perl.
|
|
*/
|
|
#include "jsstddef.h"
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "prtypes.h"
|
|
#include "prarena.h"
|
|
#include "prassert.h"
|
|
#include "jsapi.h"
|
|
#include "jsarray.h"
|
|
#include "jsatom.h"
|
|
#include "jscntxt.h"
|
|
#include "jsconfig.h"
|
|
#include "jsfun.h"
|
|
#include "jsgc.h"
|
|
#include "jsinterp.h"
|
|
#include "jslock.h"
|
|
#include "jsnum.h"
|
|
#include "jsobj.h"
|
|
#include "jsopcode.h"
|
|
#include "jsregexp.h"
|
|
#include "jsstr.h"
|
|
|
|
#if JS_HAS_REGEXPS
|
|
|
|
typedef struct RENode RENode;
|
|
|
|
typedef enum REOp {
|
|
REOP_EMPTY = 0, /* match rest of input against rest of r.e. */
|
|
REOP_ALT = 1, /* alternative subexpressions in kid and next */
|
|
REOP_BOL = 2, /* beginning of input (or line if multiline) */
|
|
REOP_EOL = 3, /* end of input (or line if multiline) */
|
|
REOP_WBDRY = 4, /* match "" at word boundary */
|
|
REOP_WNONBDRY = 5, /* match "" at word non-boundary */
|
|
REOP_QUANT = 6, /* quantified atom: atom{1,2} */
|
|
REOP_STAR = 7, /* zero or more occurrences of kid */
|
|
REOP_PLUS = 8, /* one or more occurrences of kid */
|
|
REOP_OPT = 9, /* optional subexpression in kid */
|
|
REOP_LPAREN = 10, /* left paren bytecode: kid is u.num'th sub-regexp */
|
|
REOP_RPAREN = 11, /* right paren bytecode */
|
|
REOP_DOT = 12, /* stands for any character */
|
|
REOP_CCLASS = 13, /* character class: [a-f] */
|
|
REOP_DIGIT = 14, /* match a digit char: [0-9] */
|
|
REOP_NONDIGIT = 15, /* match a non-digit char: [^0-9] */
|
|
REOP_ALNUM = 16, /* match an alphanumeric char: [0-9a-z_A-Z] */
|
|
REOP_NONALNUM = 17, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
|
|
REOP_SPACE = 18, /* match a whitespace char */
|
|
REOP_NONSPACE = 19, /* match a non-whitespace char */
|
|
REOP_BACKREF = 20, /* back-reference (e.g., \1) to a parenthetical */
|
|
REOP_FLAT = 21, /* match a flat string */
|
|
REOP_FLAT1 = 22, /* match a single char */
|
|
REOP_JUMP = 23, /* for deoptimized closure loops */
|
|
REOP_DOTSTAR = 24, /* optimize .* to use a single opcode */
|
|
REOP_ANCHOR = 25, /* like .* but skips left context to unanchored r.e. */
|
|
REOP_EOLONLY = 26, /* $ not preceded by any pattern */
|
|
REOP_UCFLAT = 27, /* flat Unicode string; len immediate counts chars */
|
|
REOP_UCFLAT1 = 28, /* single Unicode char */
|
|
REOP_UCCLASS = 29, /* Unicode character class, vector of chars to match */
|
|
REOP_NUCCLASS = 30, /* negated Unicode character class */
|
|
REOP_BACKREFi = 31, /* case-independent REOP_BACKREF */
|
|
REOP_FLATi = 32, /* case-independent REOP_FLAT */
|
|
REOP_FLAT1i = 33, /* case-independent REOP_FLAT1 */
|
|
REOP_UCFLATi = 34, /* case-independent REOP_UCFLAT */
|
|
REOP_UCFLAT1i = 35, /* case-independent REOP_UCFLAT1 */
|
|
REOP_ANCHOR1 = 36, /* first-char discriminating REOP_ANCHOR */
|
|
REOP_NCCLASS = 37, /* negated 8-bit character class */
|
|
REOP_END
|
|
} REOp;
|
|
|
|
#define CCLASS_CHARSET_SIZE 256 /* ISO-Latin-1 */
|
|
|
|
uint8 reopsize[] = {
|
|
/* EMPTY */ 1,
|
|
/* ALT */ 3,
|
|
/* BOL */ 1,
|
|
/* EOL */ 1,
|
|
/* WBDRY */ 1,
|
|
/* WNONBDRY */ 1,
|
|
/* QUANT */ 7,
|
|
/* STAR */ 1,
|
|
/* PLUS */ 1,
|
|
/* OPT */ 1,
|
|
/* LPAREN */ 3,
|
|
/* RPAREN */ 3,
|
|
/* DOT */ 1,
|
|
/* CCLASS */ 1 + (CCLASS_CHARSET_SIZE / PR_BITS_PER_BYTE),
|
|
/* DIGIT */ 1,
|
|
/* NONDIGIT */ 1,
|
|
/* ALNUM */ 1,
|
|
/* NONALNUM */ 1,
|
|
/* SPACE */ 1,
|
|
/* NONSPACE */ 1,
|
|
/* BACKREF */ 2,
|
|
/* FLAT */ 2, /* (2 = op + len) + [len bytes] */
|
|
/* FLAT1 */ 2,
|
|
/* JUMP */ 3,
|
|
/* DOTSTAR */ 1,
|
|
/* ANCHOR */ 1,
|
|
/* EOLONLY */ 1,
|
|
/* UCFLAT */ 2, /* (2 = op + len) + [len 2-byte chars] */
|
|
/* UCFLAT1 */ 3, /* op + hibyte + lobyte */
|
|
/* UCCLASS */ 3, /* (3 = op + 2-byte len) + [len bytes] */
|
|
/* NUCCLASS */ 3, /* (3 = op + 2-byte len) + [len bytes] */
|
|
/* BACKREFi */ 2,
|
|
/* FLATi */ 2, /* (2 = op + len) + [len bytes] */
|
|
/* FLAT1i */ 2,
|
|
/* UCFLATi */ 2, /* (2 = op + len) + [len 2-byte chars] */
|
|
/* UCFLAT1i */ 3, /* op + hibyte + lobyte */
|
|
/* ANCHOR1 */ 1,
|
|
/* NCCLASS */ 1 + (CCLASS_CHARSET_SIZE / PR_BITS_PER_BYTE),
|
|
/* END */ 0,
|
|
};
|
|
|
|
#define REOP_FLATLEN_MAX 255 /* maximum length of FLAT string */
|
|
|
|
struct RENode {
|
|
uint8 op; /* packed r.e. op bytecode */
|
|
uint8 flags; /* flags, see below */
|
|
uint16 offset; /* bytecode offset */
|
|
RENode *next; /* next in concatenation order */
|
|
void *kid; /* first operand */
|
|
union {
|
|
void *kid2; /* second operand */
|
|
jsint num; /* could be a number */
|
|
jschar chr; /* or a character */
|
|
struct { /* or a quantifier range */
|
|
uint16 min;
|
|
uint16 max;
|
|
} range;
|
|
struct { /* or a Unicode character class */
|
|
uint16 kidlen; /* length of string at kid, in jschars */
|
|
uint16 bmsize; /* bitmap size, based on max char code */
|
|
} ucclass;
|
|
} u;
|
|
};
|
|
|
|
#define REOP(ren) ((REOp)(ren)->op)
|
|
|
|
#define RENODE_ANCHORED 0x01 /* anchored at the front */
|
|
#define RENODE_SINGLE 0x02 /* matches a single char */
|
|
#define RENODE_NONEMPTY 0x04 /* does not match empty string */
|
|
#define RENODE_ISNEXT 0x08 /* ren is next after at least one node */
|
|
#define RENODE_GOODNEXT 0x10 /* ren->next is a tree-like edge in the graph */
|
|
#define RENODE_ISJOIN 0x20 /* ren is a join point in the graph */
|
|
#define RENODE_REALLOK 0x40 /* REOP_FLAT owns tempPool space to realloc */
|
|
|
|
typedef struct CompilerState {
|
|
JSContext *context;
|
|
const jschar *cpbegin;
|
|
const jschar *cp;
|
|
uintN flags;
|
|
uintN parenCount;
|
|
size_t progLength;
|
|
} CompilerState;
|
|
|
|
static RENode *
|
|
NewRENode(CompilerState *state, REOp op, void *kid)
|
|
{
|
|
JSContext *cx;
|
|
RENode *ren;
|
|
|
|
cx = state->context;
|
|
PR_ARENA_ALLOCATE(ren, &cx->tempPool, sizeof *ren);
|
|
if (!ren) {
|
|
JS_ReportOutOfMemory(cx);
|
|
return NULL;
|
|
}
|
|
ren->op = (uint8)op;
|
|
ren->flags = 0;
|
|
ren->offset = 0;
|
|
ren->next = NULL;
|
|
ren->kid = kid;
|
|
return ren;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
|
|
#include <stdio.h>
|
|
|
|
static char *reopname[] = {
|
|
"empty",
|
|
"alt",
|
|
"bol",
|
|
"eol",
|
|
"wbdry",
|
|
"wnonbdry",
|
|
"quant",
|
|
"star",
|
|
"plus",
|
|
"opt",
|
|
"lparen",
|
|
"rparen",
|
|
"dot",
|
|
"cclass",
|
|
"digit",
|
|
"nondigit",
|
|
"alnum",
|
|
"nonalnum",
|
|
"space",
|
|
"nonspace",
|
|
"backref",
|
|
"flat",
|
|
"flat1",
|
|
"jump",
|
|
"dotstar",
|
|
"anchor",
|
|
"eolonly",
|
|
"ucflat",
|
|
"ucflat1",
|
|
"ucclass",
|
|
"nucclass",
|
|
"backrefi",
|
|
"flati",
|
|
"flat1i",
|
|
"ucflati",
|
|
"ucflat1i",
|
|
"anchor1",
|
|
"ncclass",
|
|
"end"
|
|
};
|
|
|
|
static void
|
|
PrintChar(jschar c)
|
|
{
|
|
if (c >> 8)
|
|
printf("\\u%04X", c);
|
|
else
|
|
#if !defined XP_PC || !defined _MSC_VER || _MSC_VER > 800
|
|
putchar((char)c);
|
|
#else
|
|
/* XXX is there a better way with MSVC1.52? */
|
|
printf("%c", c);
|
|
#endif
|
|
}
|
|
|
|
static JSBool
|
|
DumpRegExp(JSContext *cx, RENode *ren)
|
|
{
|
|
static int level;
|
|
JSBool ok;
|
|
int i, len;
|
|
jschar *cp;
|
|
char *cstr;
|
|
|
|
if (level == 0)
|
|
printf("level offset flags description\n");
|
|
level++;
|
|
ok = JS_TRUE;
|
|
do {
|
|
printf("%5d %6d %c%c%c%c%c%c %s",
|
|
level,
|
|
(int)ren->offset,
|
|
(ren->flags & RENODE_ANCHORED) ? 'A' : '-',
|
|
(ren->flags & RENODE_SINGLE) ? 'S' : '-',
|
|
(ren->flags & RENODE_NONEMPTY) ? 'F' : '-', /* F for full */
|
|
(ren->flags & RENODE_ISNEXT) ? 'N' : '-', /* N for next */
|
|
(ren->flags & RENODE_GOODNEXT) ? 'G' : '-',
|
|
(ren->flags & RENODE_ISJOIN) ? 'J' : '-',
|
|
reopname[ren->op]);
|
|
|
|
switch (REOP(ren)) {
|
|
case REOP_ALT:
|
|
printf(" %d\n", ren->next->offset);
|
|
ok = DumpRegExp(cx, ren->kid);
|
|
if (!ok)
|
|
goto out;
|
|
break;
|
|
|
|
case REOP_STAR:
|
|
case REOP_PLUS:
|
|
case REOP_OPT:
|
|
case REOP_ANCHOR1:
|
|
printf("\n");
|
|
ok = DumpRegExp(cx, ren->kid);
|
|
if (!ok)
|
|
goto out;
|
|
break;
|
|
|
|
case REOP_QUANT:
|
|
printf(" next %d min %d max %d\n",
|
|
ren->next->offset, ren->u.range.min, ren->u.range.max);
|
|
ok = DumpRegExp(cx, ren->kid);
|
|
if (!ok)
|
|
goto out;
|
|
break;
|
|
|
|
case REOP_LPAREN:
|
|
printf(" num %d\n", (int)ren->u.num);
|
|
ok = DumpRegExp(cx, ren->kid);
|
|
if (!ok)
|
|
goto out;
|
|
break;
|
|
|
|
case REOP_RPAREN:
|
|
printf(" num %d\n", (int)ren->u.num);
|
|
break;
|
|
|
|
case REOP_CCLASS:
|
|
len = (jschar *)ren->u.kid2 - (jschar *)ren->kid;
|
|
cstr = js_DeflateString(cx, (jschar *)ren->kid, len);
|
|
if (!cstr) {
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
printf(" [%s]\n", cstr);
|
|
JS_free(cx, cstr);
|
|
break;
|
|
|
|
case REOP_BACKREF:
|
|
printf(" num %d\n", (int)ren->u.num);
|
|
break;
|
|
|
|
case REOP_FLAT:
|
|
len = (jschar *)ren->u.kid2 - (jschar *)ren->kid;
|
|
cstr = js_DeflateString(cx, (jschar *)ren->kid, len);
|
|
if (!cstr) {
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
printf(" %s (%d)\n", cstr, len);
|
|
JS_free(cx, cstr);
|
|
break;
|
|
|
|
case REOP_FLAT1:
|
|
printf(" %c ('\\%o')\n", (char)ren->u.chr, ren->u.chr);
|
|
break;
|
|
|
|
case REOP_JUMP:
|
|
printf(" %d\n", ren->next->offset);
|
|
break;
|
|
|
|
case REOP_UCFLAT:
|
|
cp = ren->kid;
|
|
len = (jschar *)ren->u.kid2 - cp;
|
|
for (i = 0; i < len; i++)
|
|
PrintChar(cp[i]);
|
|
break;
|
|
|
|
case REOP_UCFLAT1:
|
|
PrintChar(ren->u.chr);
|
|
break;
|
|
|
|
case REOP_UCCLASS:
|
|
cp = ren->kid;
|
|
len = (jschar *)ren->u.kid2 - cp;
|
|
printf(" [");
|
|
for (i = 0; i < len; i++)
|
|
PrintChar(cp[i]);
|
|
printf("]\n");
|
|
break;
|
|
|
|
default:
|
|
printf("\n");
|
|
break;
|
|
}
|
|
|
|
if (!(ren->flags & RENODE_GOODNEXT))
|
|
break;
|
|
} while ((ren = ren->next) != NULL);
|
|
out:
|
|
level--;
|
|
return ok;
|
|
}
|
|
|
|
#endif /* DEBUG */
|
|
|
|
static JSBool
|
|
FixNext(CompilerState *state, RENode *ren1, RENode *ren2, RENode *oldnext)
|
|
{
|
|
JSBool goodnext;
|
|
RENode *next, *kid, *ren;
|
|
|
|
goodnext = ren2 && !(ren2->flags & RENODE_ISNEXT);
|
|
|
|
/*
|
|
* Find the final node in a list of alternatives, or concatenations, or
|
|
* even a concatenation of alternatives followed by non-alternatives (e.g.
|
|
* ((x|y)z)w where ((x|y)z) is ren1 and w is ren2).
|
|
*/
|
|
for (; (next = ren1->next) != NULL && next != oldnext; ren1 = next) {
|
|
if (REOP(ren1) == REOP_ALT) {
|
|
/* Find the end of this alternative's operand list. */
|
|
kid = ren1->kid;
|
|
if (REOP(kid) == REOP_JUMP)
|
|
continue;
|
|
for (ren = kid; ren->next; ren = ren->next)
|
|
PR_ASSERT(REOP(ren) != REOP_ALT);
|
|
|
|
/* Append a jump node to all but the last alternative. */
|
|
ren->next = NewRENode(state, REOP_JUMP, NULL);
|
|
if (!ren->next)
|
|
return JS_FALSE;
|
|
ren->next->flags |= RENODE_ISNEXT;
|
|
ren->flags |= RENODE_GOODNEXT;
|
|
|
|
/* Recur to fix all descendent nested alternatives. */
|
|
if (!FixNext(state, kid, ren2, oldnext))
|
|
return JS_FALSE;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now ren1 points to the last alternative, or to the final node on a
|
|
* concatenation list. Set its next link to ren2, flagging a join point
|
|
* if appropriate.
|
|
*/
|
|
if (ren2) {
|
|
if (!(ren2->flags & RENODE_ISNEXT))
|
|
ren2->flags |= RENODE_ISNEXT;
|
|
else
|
|
ren2->flags |= RENODE_ISJOIN;
|
|
}
|
|
ren1->next = ren2;
|
|
if (goodnext)
|
|
ren1->flags |= RENODE_GOODNEXT;
|
|
|
|
/*
|
|
* The following ops have a kid subtree through which to recur. Here is
|
|
* where we fix the next links under the final ALT node's kid.
|
|
*/
|
|
switch (REOP(ren1)) {
|
|
case REOP_ALT:
|
|
case REOP_QUANT:
|
|
case REOP_STAR:
|
|
case REOP_PLUS:
|
|
case REOP_OPT:
|
|
case REOP_LPAREN:
|
|
if (!FixNext(state, ren1->kid, ren2, oldnext))
|
|
return JS_FALSE;
|
|
break;
|
|
default:;
|
|
}
|
|
return JS_TRUE;
|
|
}
|
|
|
|
static JSBool
|
|
SetNext(CompilerState *state, RENode *ren1, RENode *ren2)
|
|
{
|
|
return FixNext(state, ren1, ren2, NULL);
|
|
}
|
|
|
|
/*
|
|
* Parser forward declarations.
|
|
*/
|
|
typedef RENode *REParser(CompilerState *state);
|
|
|
|
static REParser ParseRegExp;
|
|
static REParser ParseAltern;
|
|
static REParser ParseItem;
|
|
static REParser ParseQuantAtom;
|
|
static REParser ParseAtom;
|
|
|
|
/*
|
|
* Top-down regular expression grammar, based closely on Perl4.
|
|
*
|
|
* regexp: altern A regular expression is one or more
|
|
* altern '|' regexp alternatives separated by vertical bar.
|
|
*/
|
|
static RENode *
|
|
ParseRegExp(CompilerState *state)
|
|
{
|
|
RENode *ren, *kid, *ren1, *ren2;
|
|
const jschar *cp;
|
|
|
|
ren = ParseAltern(state);
|
|
if (!ren)
|
|
return NULL;
|
|
cp = state->cp;
|
|
if (*cp == '|') {
|
|
kid = ren;
|
|
ren = NewRENode(state, REOP_ALT, kid);
|
|
if (!ren)
|
|
return NULL;
|
|
ren->flags = kid->flags & (RENODE_ANCHORED | RENODE_NONEMPTY);
|
|
ren1 = ren;
|
|
do {
|
|
/* (balance: */
|
|
state->cp = ++cp;
|
|
if (*cp == '|' || *cp == ')') {
|
|
kid = NewRENode(state, REOP_EMPTY, NULL);
|
|
} else {
|
|
kid = ParseAltern(state);
|
|
cp = state->cp;
|
|
}
|
|
if (!kid)
|
|
return NULL;
|
|
ren2 = NewRENode(state, REOP_ALT, kid);
|
|
if (!ren2)
|
|
return NULL;
|
|
ren1->next = ren2;
|
|
ren1->flags |= RENODE_GOODNEXT;
|
|
ren2->flags = (kid->flags & (RENODE_ANCHORED | RENODE_NONEMPTY))
|
|
| RENODE_ISNEXT;
|
|
ren1 = ren2;
|
|
} while (*cp == '|');
|
|
}
|
|
return ren;
|
|
}
|
|
|
|
/*
|
|
* altern: item An alternative is one or more items,
|
|
* item altern concatenated together.
|
|
*/
|
|
static RENode *
|
|
ParseAltern(CompilerState *state)
|
|
{
|
|
RENode *ren, *ren1, *ren2;
|
|
uintN flags;
|
|
const jschar *cp;
|
|
jschar c;
|
|
|
|
ren = ren1 = ParseItem(state);
|
|
if (!ren)
|
|
return NULL;
|
|
flags = 0;
|
|
cp = state->cp;
|
|
/* (balance: */
|
|
while ((c = *cp) != 0 && c != '|' && c != ')') {
|
|
ren2 = ParseItem(state);
|
|
if (!ren2)
|
|
return NULL;
|
|
if (!SetNext(state, ren1, ren2))
|
|
return NULL;
|
|
flags |= ren2->flags;
|
|
ren1 = ren2;
|
|
cp = state->cp;
|
|
}
|
|
|
|
/*
|
|
* Propagate NONEMPTY to the front of a concatenation list, so that the
|
|
* first alternative in (^a|b) is considered non-empty. The first node
|
|
* in a list may match the empty string (as ^ does), but if the list is
|
|
* non-empty, then the first node's NONEMPTY flag must be set.
|
|
*/
|
|
ren->flags |= flags & RENODE_NONEMPTY;
|
|
return ren;
|
|
}
|
|
|
|
/*
|
|
* item: assertion An item is either an assertion or
|
|
* quantatom a quantified atom.
|
|
*
|
|
* assertion: '^' Assertions match beginning of string
|
|
* (or line if the class static property
|
|
* RegExp.multiline is true).
|
|
* '$' End of string (or line if the class
|
|
* static property RegExp.multiline is
|
|
* true).
|
|
* '\b' Word boundary (between \w and \W).
|
|
* '\B' Word non-boundary.
|
|
*/
|
|
static RENode *
|
|
ParseItem(CompilerState *state)
|
|
{
|
|
const jschar *cp;
|
|
RENode *ren;
|
|
REOp op;
|
|
|
|
cp = state->cp;
|
|
switch (*cp) {
|
|
case '^':
|
|
state->cp = cp + 1;
|
|
ren = NewRENode(state, REOP_BOL, NULL);
|
|
if (ren)
|
|
ren->flags |= RENODE_ANCHORED;
|
|
return ren;
|
|
|
|
case '$':
|
|
state->cp = cp + 1;
|
|
return NewRENode(state,
|
|
(cp == state->cpbegin ||
|
|
((cp[-1] == '(' || cp[-1] == '|') && /*balance)*/
|
|
(cp - 1 == state->cpbegin || cp[-2] != '\\')))
|
|
? REOP_EOLONLY
|
|
: REOP_EOL,
|
|
NULL);
|
|
|
|
case '\\':
|
|
switch (*++cp) {
|
|
case 'b':
|
|
op = REOP_WBDRY;
|
|
break;
|
|
case 'B':
|
|
op = REOP_WNONBDRY;
|
|
break;
|
|
default:
|
|
return ParseQuantAtom(state);
|
|
}
|
|
|
|
/*
|
|
* Word boundaries and non-boundaries are flagged as non-empty so they
|
|
* will be prefixed by an anchoring node.
|
|
*/
|
|
state->cp = cp + 1;
|
|
ren = NewRENode(state, op, NULL);
|
|
if (ren)
|
|
ren->flags |= RENODE_NONEMPTY;
|
|
return ren;
|
|
|
|
default:;
|
|
}
|
|
return ParseQuantAtom(state);
|
|
}
|
|
|
|
/*
|
|
* quantatom: atom An unquantified atom.
|
|
* quantatom '{' n ',' m '}'
|
|
* Atom must occur between n and m times.
|
|
* quantatom '{' n ',' '}' Atom must occur at least n times.
|
|
* quantatom '{' n '}' Atom must occur exactly n times.
|
|
* quantatom '*' Zero or more times (same as {0,}).
|
|
* quantatom '+' One or more times (same as {1,}).
|
|
* quantatom '?' Zero or one time (same as {0,1}).
|
|
*/
|
|
static RENode *
|
|
ParseQuantAtom(CompilerState *state)
|
|
{
|
|
RENode *ren, *ren2;
|
|
const jschar *cp, *up;
|
|
jschar c;
|
|
uint32 min, max;
|
|
|
|
ren = ParseAtom(state);
|
|
if (!ren)
|
|
return NULL;
|
|
|
|
cp = state->cp;
|
|
loop:
|
|
switch (*cp) {
|
|
case '{':
|
|
c = *++cp;
|
|
if (!JS7_ISDEC(c)) {
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_BAD_QUANTIFIER, state->cp);
|
|
return NULL;
|
|
}
|
|
min = (uint32)JS7_UNDEC(c);
|
|
for (c = *++cp; JS7_ISDEC(c); c = *++cp) {
|
|
min = 10 * min + (uint32)JS7_UNDEC(c);
|
|
if (min >> 16) {
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_MIN_TOO_BIG, state->cp);
|
|
return NULL;
|
|
}
|
|
}
|
|
if (*cp == ',') {
|
|
up = ++cp;
|
|
if (JS7_ISDEC(*cp)) {
|
|
max = (uint32)JS7_UNDEC(*cp);
|
|
for (c = *++cp; JS7_ISDEC(c); c = *++cp) {
|
|
max = 10 * max + (uint32)JS7_UNDEC(c);
|
|
if (max >> 16) {
|
|
JS_ReportErrorNumber(state->context,
|
|
js_GetErrorMessage, NULL,
|
|
JSMSG_MAX_TOO_BIG, up);
|
|
return NULL;
|
|
}
|
|
}
|
|
if (max == 0)
|
|
goto zero_quant;
|
|
if (min > max) {
|
|
JS_ReportErrorNumber(state->context,
|
|
js_GetErrorMessage, NULL,
|
|
JSMSG_OUT_OF_ORDER, up);
|
|
return NULL;
|
|
}
|
|
} else {
|
|
/* 0 means no upper bound. */
|
|
max = 0;
|
|
}
|
|
} else {
|
|
/* Exactly n times. */
|
|
if (min == 0) {
|
|
zero_quant:
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_ZERO_QUANTIFIER, state->cp);
|
|
return NULL;
|
|
}
|
|
max = min;
|
|
}
|
|
if (*cp != '}') {
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_UNTERM_QUANTIFIER, state->cp);
|
|
return NULL;
|
|
}
|
|
cp++;
|
|
|
|
ren2 = NewRENode(state, REOP_QUANT, ren);
|
|
if (!ren2)
|
|
return NULL;
|
|
if (min > 0 && (ren->flags & RENODE_NONEMPTY))
|
|
ren2->flags |= RENODE_NONEMPTY;
|
|
ren2->u.range.min = (uint16)min;
|
|
ren2->u.range.max = (uint16)max;
|
|
ren = ren2;
|
|
goto loop;
|
|
|
|
case '*':
|
|
if (!(ren->flags & RENODE_NONEMPTY)) {
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_EMPTY_BEFORE_STAR);
|
|
return NULL;
|
|
}
|
|
cp++;
|
|
ren = NewRENode(state, REOP_STAR, ren);
|
|
goto loop;
|
|
|
|
case '+':
|
|
if (!(ren->flags & RENODE_NONEMPTY)) {
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_EMPTY_BEFORE_PLUS);
|
|
return NULL;
|
|
}
|
|
cp++;
|
|
ren2 = NewRENode(state, REOP_PLUS, ren);
|
|
if (!ren2)
|
|
return NULL;
|
|
if (ren->flags & RENODE_NONEMPTY)
|
|
ren2->flags |= RENODE_NONEMPTY;
|
|
ren = ren2;
|
|
goto loop;
|
|
|
|
case '?':
|
|
cp++;
|
|
ren = NewRENode(state, REOP_OPT, ren);
|
|
goto loop;
|
|
}
|
|
|
|
state->cp = cp;
|
|
return ren;
|
|
}
|
|
|
|
/*
|
|
* atom: '(' regexp ')' A parenthesized regexp (what matched
|
|
* can be addressed using a backreference,
|
|
* see '\' n below).
|
|
* '.' Matches any char except '\n'.
|
|
* '[' classlist ']' A character class.
|
|
* '[' '^' classlist ']' A negated character class.
|
|
* '\f' Form Feed.
|
|
* '\n' Newline (Line Feed).
|
|
* '\r' Carriage Return.
|
|
* '\t' Horizontal Tab.
|
|
* '\v' Vertical Tab.
|
|
* '\d' A digit (same as [0-9]).
|
|
* '\D' A non-digit.
|
|
* '\w' A word character, [0-9a-z_A-Z].
|
|
* '\W' A non-word character.
|
|
* '\s' A whitespace character, [ \b\f\n\r\t\v].
|
|
* '\S' A non-whitespace character.
|
|
* '\' n A backreference to the nth (n decimal
|
|
* and positive) parenthesized expression.
|
|
* '\' octal An octal escape sequence (octal must be
|
|
* two or three digits long, unless it is
|
|
* 0 for the null character).
|
|
* '\x' hex A hex escape (hex must be two digits).
|
|
* '\c' ctrl A control character, ctrl is a letter.
|
|
* '\' literalatomchar Any character except one of the above
|
|
* that follow '\' in an atom.
|
|
* otheratomchar Any character not first among the other
|
|
* atom right-hand sides.
|
|
*/
|
|
static jschar metachars[] = {
|
|
'|', '^', '$', '{', '*', '+', '?', '(', ')', '.', '[', '\\', '}', 0
|
|
};
|
|
|
|
static jschar closurechars[] = {
|
|
'{', '*', '+', '?', 0 /* balance} */
|
|
};
|
|
|
|
static RENode *
|
|
ParseAtom(CompilerState *state)
|
|
{
|
|
const jschar *cp, *ocp;
|
|
uintN num, tmp, len;
|
|
RENode *ren, *ren2;
|
|
jschar c;
|
|
|
|
cp = ocp = state->cp;
|
|
switch (*cp) {
|
|
case 0:
|
|
ren = NewRENode(state, REOP_EMPTY, NULL);
|
|
break;
|
|
|
|
case '(':
|
|
num = state->parenCount++; /* \1 is numbered 0, etc. */
|
|
state->cp = cp + 1;
|
|
ren2 = ParseRegExp(state);
|
|
if (!ren2)
|
|
return NULL;
|
|
cp = state->cp;
|
|
if (*cp != ')') {
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_MISSING_PAREN, ocp);
|
|
return NULL;
|
|
}
|
|
cp++;
|
|
ren = NewRENode(state, REOP_LPAREN, ren2);
|
|
if (!ren)
|
|
return NULL;
|
|
ren->flags = ren2->flags & (RENODE_ANCHORED | RENODE_NONEMPTY);
|
|
ren->u.num = num;
|
|
ren2 = NewRENode(state, REOP_RPAREN, NULL);
|
|
if (!ren2 || !SetNext(state, ren, ren2))
|
|
return NULL;
|
|
ren2->u.num = num;
|
|
break;
|
|
|
|
case '.':
|
|
cp++;
|
|
if ((c = *cp) == '*')
|
|
cp++;
|
|
ren = NewRENode(state, (c == '*') ? REOP_DOTSTAR : REOP_DOT, NULL);
|
|
if (ren && REOP(ren) == REOP_DOT)
|
|
ren->flags = RENODE_SINGLE | RENODE_NONEMPTY;
|
|
break;
|
|
|
|
case '[':
|
|
/* A char class must have at least one char in it. */
|
|
if ((c = *++cp) == 0)
|
|
goto bad_cclass;
|
|
|
|
ren = NewRENode(state, REOP_CCLASS, (void *)cp);
|
|
if (!ren)
|
|
return NULL;
|
|
|
|
/* A negated class must have at least one char in it after the ^. */
|
|
if (c == '^' && *++cp == 0)
|
|
goto bad_cclass;
|
|
|
|
while ((c = *++cp) != ']') {
|
|
if (c == 0) {
|
|
bad_cclass:
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_UNTERM_CLASS, ocp);
|
|
return NULL;
|
|
}
|
|
if (c == '\\' && cp[1] != 0)
|
|
cp++;
|
|
}
|
|
ren->u.kid2 = (void *)cp++;
|
|
|
|
/* Since we rule out [] and [^], we can set the non-empty flag. */
|
|
ren->flags = RENODE_SINGLE | RENODE_NONEMPTY;
|
|
break;
|
|
|
|
case '\\':
|
|
c = *++cp;
|
|
switch (c) {
|
|
case 0:
|
|
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
|
|
JSMSG_TRAILING_SLASH);
|
|
return NULL;
|
|
|
|
case 'f':
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
case 'v':
|
|
ren = NewRENode(state, REOP_FLAT1, NULL);
|
|
c = js_strchr(js_EscapeMap, c)[-1];
|
|
break;
|
|
|
|
case 'd':
|
|
ren = NewRENode(state, REOP_DIGIT, NULL);
|
|
break;
|
|
case 'D':
|
|
ren = NewRENode(state, REOP_NONDIGIT, NULL);
|
|
break;
|
|
case 'w':
|
|
ren = NewRENode(state, REOP_ALNUM, NULL);
|
|
break;
|
|
case 'W':
|
|
ren = NewRENode(state, REOP_NONALNUM, NULL);
|
|
break;
|
|
case 's':
|
|
ren = NewRENode(state, REOP_SPACE, NULL);
|
|
break;
|
|
case 'S':
|
|
ren = NewRENode(state, REOP_NONSPACE, NULL);
|
|
break;
|
|
|
|
case '0':
|
|
do_octal:
|
|
num = 0;
|
|
while ('0' <= (c = *++cp) && c <= '7') {
|
|
tmp = 8 * num + (uintN)JS7_UNDEC(c);
|
|
if (tmp > 0377)
|
|
break;
|
|
num = tmp;
|
|
}
|
|
cp--;
|
|
ren = NewRENode(state, REOP_FLAT1, NULL);
|
|
c = (jschar)num;
|
|
break;
|
|
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
num = (uintN)JS7_UNDEC(c);
|
|
for (c = *++cp; JS7_ISDEC(c); c = *++cp)
|
|
num = 10 * num - (uintN)JS7_UNDEC(c);
|
|
if (num > 9 && num > state->parenCount) {
|
|
cp = ocp;
|
|
goto do_octal;
|
|
}
|
|
cp--;
|
|
ren = NewRENode(state, REOP_BACKREF, NULL);
|
|
if (!ren)
|
|
return NULL;
|
|
ren->u.num = num - 1; /* \1 is numbered 0, etc. */
|
|
|
|
/* Avoid common chr- and flags-setting code after switch. */
|
|
ren->flags = RENODE_NONEMPTY;
|
|
goto bump_cp;
|
|
|
|
case 'x':
|
|
ocp = cp;
|
|
c = *++cp;
|
|
if (JS7_ISHEX(c)) {
|
|
num = JS7_UNHEX(c);
|
|
c = *++cp;
|
|
if (JS7_ISHEX(c)) {
|
|
num <<= 4;
|
|
num += JS7_UNHEX(c);
|
|
} else {
|
|
cp--; /* back up so cp points to last hex char */
|
|
}
|
|
} else {
|
|
cp = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */
|
|
num = 'x';
|
|
}
|
|
ren = NewRENode(state, REOP_FLAT1, NULL);
|
|
c = (jschar)num;
|
|
break;
|
|
|
|
case 'c':
|
|
c = *++cp;
|
|
if (!JS7_ISLET(c)) {
|
|
cp -= 2;
|
|
ocp = cp;
|
|
goto do_flat;
|
|
}
|
|
c = JS_TOUPPER(c);
|
|
c = JS_TOCTRL(c);
|
|
ren = NewRENode(state, REOP_FLAT1, NULL);
|
|
break;
|
|
|
|
case 'u':
|
|
if (JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
|
|
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) {
|
|
c = (((((JS7_UNHEX(cp[1]) << 4) + JS7_UNHEX(cp[2])) << 4)
|
|
+ JS7_UNHEX(cp[3])) << 4) + JS7_UNHEX(cp[4]);
|
|
cp += 4;
|
|
ren = NewRENode(state, REOP_FLAT1, NULL);
|
|
break;
|
|
}
|
|
|
|
/* Unlike Perl \xZZ, we take \uZZZ to be literal-u then ZZZ. */
|
|
ocp = cp;
|
|
goto do_flat;
|
|
|
|
default:
|
|
ocp = cp;
|
|
goto do_flat;
|
|
}
|
|
|
|
/* Common chr- and flags-setting code for escape opcodes. */
|
|
if (ren) {
|
|
ren->u.chr = c;
|
|
ren->flags = RENODE_SINGLE | RENODE_NONEMPTY;
|
|
}
|
|
|
|
bump_cp:
|
|
/* Skip to next unparsed char. */
|
|
cp++;
|
|
break;
|
|
|
|
default:
|
|
do_flat:
|
|
while ((c = *++cp) != 0 && !js_strchr(metachars, c))
|
|
;
|
|
len = (uintN)(cp - ocp);
|
|
if (c != 0 && len > 1 && js_strchr(closurechars, c)) {
|
|
cp--;
|
|
len--;
|
|
}
|
|
if (len > REOP_FLATLEN_MAX) {
|
|
len = REOP_FLATLEN_MAX;
|
|
cp = ocp + len;
|
|
}
|
|
ren = NewRENode(state, (len == 1) ? REOP_FLAT1 : REOP_FLAT,
|
|
(void *)ocp);
|
|
if (!ren)
|
|
return NULL;
|
|
ren->flags = RENODE_NONEMPTY;
|
|
if (len > 1) {
|
|
ren->u.kid2 = (void *)cp;
|
|
} else {
|
|
ren->flags |= RENODE_SINGLE;
|
|
ren->u.chr = *ocp;
|
|
}
|
|
break;
|
|
}
|
|
|
|
state->cp = cp;
|
|
return ren;
|
|
}
|
|
|
|
static ptrdiff_t
|
|
CountFirstChars(RENode *alt)
|
|
{
|
|
ptrdiff_t len, sublen;
|
|
RENode *kid;
|
|
jschar c, *ccp, *ccend;
|
|
|
|
len = 0;
|
|
do {
|
|
for (kid = alt->kid; REOP(kid) == REOP_LPAREN; kid = kid->kid)
|
|
;
|
|
switch (REOP(kid)) {
|
|
case REOP_QUANT:
|
|
if (kid->u.range.min == 0)
|
|
return -1;
|
|
/* FALL THROUGH */
|
|
case REOP_PLUS:
|
|
case REOP_ALT:
|
|
sublen = CountFirstChars(kid);
|
|
if (sublen < 0)
|
|
return sublen;
|
|
len += sublen;
|
|
break;
|
|
case REOP_FLAT:
|
|
c = *(jschar *)kid->kid;
|
|
goto count_char;
|
|
case REOP_FLAT1:
|
|
c = kid->u.chr;
|
|
count_char:
|
|
/* Only '\\' and '-' need escaping within a character class. */
|
|
if (c == '\\' || c == '-')
|
|
len += 2;
|
|
else
|
|
len++;
|
|
break;
|
|
case REOP_CCLASS:
|
|
ccp = kid->kid;
|
|
if (*ccp == '^')
|
|
return -1;
|
|
ccend = kid->u.kid2;
|
|
len += ccend - ccp;
|
|
break;
|
|
case REOP_DIGIT:
|
|
case REOP_NONDIGIT:
|
|
case REOP_ALNUM:
|
|
case REOP_NONALNUM:
|
|
case REOP_SPACE:
|
|
case REOP_NONSPACE:
|
|
len += 2;
|
|
break;
|
|
default:
|
|
return -1;
|
|
}
|
|
/* Test for non-alt so quant and plus execute to here only. */
|
|
if (REOP(alt) != REOP_ALT)
|
|
break;
|
|
alt = alt->next;
|
|
} while (alt && REOP(alt) == REOP_ALT);
|
|
return len;
|
|
}
|
|
|
|
static ptrdiff_t
|
|
StoreChar(jschar *cp, ptrdiff_t i, jschar c, JSBool escape)
|
|
{
|
|
ptrdiff_t j;
|
|
|
|
/* Suppress dups to avoid making a flat1 into a cclass. */
|
|
for (j = 0; j < i; j++) {
|
|
if (cp[j] == '\\')
|
|
j++;
|
|
if (cp[j] == c && (!escape || (j > 0 && cp[j-1] == '\\')))
|
|
return i;
|
|
}
|
|
|
|
/* Only '\\' and '-' need escaping within a character class. */
|
|
if (escape || c == '\\' || c == '-')
|
|
cp[i++] = '\\';
|
|
cp[i++] = c;
|
|
return i;
|
|
}
|
|
|
|
static ptrdiff_t
|
|
StoreFirstChars(RENode *alt, jschar *cp, ptrdiff_t i)
|
|
{
|
|
RENode *kid;
|
|
jschar *ccp, *ccend;
|
|
|
|
do {
|
|
for (kid = alt->kid; REOP(kid) == REOP_LPAREN; kid = kid->kid)
|
|
;
|
|
switch (REOP(kid)) {
|
|
case REOP_QUANT:
|
|
PR_ASSERT(kid->u.range.min != 0);
|
|
/* FALL THROUGH */
|
|
case REOP_PLUS:
|
|
case REOP_ALT:
|
|
i = StoreFirstChars(kid, cp, i);
|
|
break;
|
|
case REOP_FLAT:
|
|
i = StoreChar(cp, i, *(jschar *)kid->kid, JS_FALSE);
|
|
break;
|
|
case REOP_FLAT1:
|
|
i = StoreChar(cp, i, kid->u.chr, JS_FALSE);
|
|
break;
|
|
case REOP_CCLASS:
|
|
ccend = kid->u.kid2;
|
|
for (ccp = kid->kid; ccp < ccend; ccp++)
|
|
cp[i++] = *ccp;
|
|
break;
|
|
case REOP_DIGIT:
|
|
i = StoreChar(cp, i, 'd', JS_TRUE);
|
|
break;
|
|
case REOP_NONDIGIT:
|
|
i = StoreChar(cp, i, 'D', JS_TRUE);
|
|
break;
|
|
case REOP_ALNUM:
|
|
i = StoreChar(cp, i, 'w', JS_TRUE);
|
|
break;
|
|
case REOP_NONALNUM:
|
|
i = StoreChar(cp, i, 'W', JS_TRUE);
|
|
break;
|
|
case REOP_SPACE:
|
|
i = StoreChar(cp, i, 's', JS_TRUE);
|
|
break;
|
|
case REOP_NONSPACE:
|
|
i = StoreChar(cp, i, 'S', JS_TRUE);
|
|
break;
|
|
default:
|
|
PR_ASSERT(0);
|
|
}
|
|
/* Test for non-alt so quant and plus execute to here only. */
|
|
if (REOP(alt) != REOP_ALT)
|
|
break;
|
|
alt = alt->next;
|
|
} while (alt && REOP(alt) == REOP_ALT);
|
|
return i;
|
|
}
|
|
|
|
static JSBool
|
|
AnchorRegExp(CompilerState *state, RENode *ren)
|
|
{
|
|
RENode *ren2, *kid;
|
|
ptrdiff_t len;
|
|
jschar *cp;
|
|
REOp op;
|
|
|
|
for (ren2 = ren; REOP(ren2) == REOP_LPAREN; ren2 = ren2->kid)
|
|
;
|
|
switch (REOP(ren2)) {
|
|
case REOP_ALT:
|
|
len = CountFirstChars(ren2);
|
|
if (len <= 0)
|
|
goto do_anchor;
|
|
PR_ARENA_ALLOCATE(cp, &state->context->tempPool, len * sizeof(jschar));
|
|
if (!cp) {
|
|
JS_ReportOutOfMemory(state->context);
|
|
return JS_FALSE;
|
|
}
|
|
|
|
len = StoreFirstChars(ren2, cp, 0);
|
|
if (len == 1) {
|
|
op = REOP_FLAT1;
|
|
} else if (len == 2 && *cp == '\\') {
|
|
switch (cp[1]) {
|
|
case '\\':
|
|
case '-':
|
|
/* No need for a character class if just '\\' or '-'. */
|
|
cp++;
|
|
op = REOP_FLAT1;
|
|
break;
|
|
case 'd':
|
|
op = REOP_DIGIT;
|
|
break;
|
|
case 'D':
|
|
op = REOP_NONDIGIT;
|
|
break;
|
|
case 'w':
|
|
op = REOP_ALNUM;
|
|
break;
|
|
case 'W':
|
|
op = REOP_NONALNUM;
|
|
break;
|
|
case 's':
|
|
op = REOP_SPACE;
|
|
break;
|
|
case 'S':
|
|
op = REOP_NONSPACE;
|
|
break;
|
|
default:
|
|
op = REOP_CCLASS;
|
|
break;
|
|
}
|
|
} else {
|
|
op = REOP_CCLASS;
|
|
}
|
|
|
|
do_first_char:
|
|
kid = NewRENode(state, op, cp);
|
|
if (!kid)
|
|
return JS_FALSE;
|
|
kid->flags = RENODE_SINGLE | RENODE_NONEMPTY;
|
|
if (op == REOP_FLAT1)
|
|
kid->u.chr = *cp;
|
|
else if (op == REOP_CCLASS)
|
|
kid->u.kid2 = cp + len;
|
|
|
|
ren2 = NewRENode(state, REOP(ren), ren->kid);
|
|
if (!ren2)
|
|
return JS_FALSE;
|
|
ren2->flags = ren->flags | RENODE_ISNEXT;
|
|
ren2->next = ren->next;
|
|
ren2->u = ren->u;
|
|
|
|
ren->op = REOP_ANCHOR1;
|
|
ren->flags = RENODE_GOODNEXT;
|
|
ren->next = ren2;
|
|
ren->kid = kid;
|
|
ren->u.kid2 = NULL;
|
|
break;
|
|
|
|
case REOP_FLAT:
|
|
cp = ren2->kid;
|
|
op = REOP_FLAT1;
|
|
goto do_first_char;
|
|
|
|
case REOP_FLAT1:
|
|
cp = &ren2->u.chr;
|
|
op = REOP_FLAT1;
|
|
goto do_first_char;
|
|
|
|
case REOP_DOTSTAR:
|
|
/*
|
|
* ".*" is anchored by definition when at the front of a list.
|
|
*/
|
|
break;
|
|
|
|
default:
|
|
do_anchor:
|
|
/*
|
|
* Any node other than dotstar that's unanchored and nonempty must be
|
|
* prefixed by REOP_ANCHOR.
|
|
*/
|
|
PR_ASSERT(REOP(ren2) != REOP_ANCHOR);
|
|
PR_ASSERT(!(ren2->flags & RENODE_ISNEXT));
|
|
if ((ren2->flags & (RENODE_ANCHORED | RENODE_NONEMPTY))
|
|
== RENODE_NONEMPTY) {
|
|
ren2 = NewRENode(state, REOP(ren), ren->kid);
|
|
if (!ren2)
|
|
return JS_FALSE;
|
|
ren2->flags = ren->flags | RENODE_ISNEXT;
|
|
ren2->next = ren->next;
|
|
ren2->u = ren->u;
|
|
ren->op = REOP_ANCHOR;
|
|
ren->flags = RENODE_GOODNEXT;
|
|
ren->next = ren2;
|
|
ren->kid = ren->u.kid2 = NULL;
|
|
}
|
|
break;
|
|
}
|
|
return JS_TRUE;
|
|
}
|
|
|
|
static RENode *
|
|
CloseTail(CompilerState *state, RENode *alt1, RENode *next)
|
|
{
|
|
RENode *alt2, *empty;
|
|
|
|
empty = NewRENode(state, REOP_EMPTY, NULL);
|
|
alt2 = NewRENode(state, REOP_ALT, empty);
|
|
if (!alt2 || !empty)
|
|
return NULL;
|
|
alt1->next = alt2;
|
|
alt2->next = empty->next = next;
|
|
if (alt1->flags & RENODE_GOODNEXT)
|
|
alt2->flags |= RENODE_GOODNEXT;
|
|
else
|
|
alt1->flags |= RENODE_GOODNEXT;
|
|
alt2->flags |= RENODE_ISNEXT;
|
|
return alt2;
|
|
}
|
|
|
|
static JSBool
|
|
OptimizeRegExp(CompilerState *state, RENode *ren)
|
|
{
|
|
RENode *kid, *next, *jump, *alt1;
|
|
uintN flag;
|
|
jschar c, c2, maxc, *cp, *cp2;
|
|
ptrdiff_t len, len2;
|
|
size_t size, incr;
|
|
JSContext *cx;
|
|
JSBool reallok;
|
|
|
|
do {
|
|
switch (REOP(ren)) {
|
|
case REOP_STAR:
|
|
kid = ren->kid;
|
|
if (!(kid->flags & RENODE_SINGLE)) {
|
|
/*
|
|
* If kid is not simple, deoptimize <kid>* as follows (the |__|
|
|
* are byte placeholders for next/jump offsets):
|
|
*
|
|
* FROM: |STAR|<kid>|
|
|
*
|
|
* +-----------------------+
|
|
* V |
|
|
* TO: |ALT|__|__|<kid>|JUMP|__|__|ALT|__|__|EMPTY|
|
|
* | ^ | ^
|
|
* +-------------------+ +--------+
|
|
*/
|
|
ren->op = REOP_ALT;
|
|
next = ren->next;
|
|
jump = NewRENode(state, REOP_JUMP, NULL);
|
|
if (!jump || !FixNext(state, kid, jump, next))
|
|
return JS_FALSE;
|
|
jump->next = ren;
|
|
if (ren->flags & RENODE_ISNEXT)
|
|
ren->flags |= RENODE_ISJOIN;
|
|
if (!CloseTail(state, ren, next))
|
|
return JS_FALSE;
|
|
}
|
|
break;
|
|
|
|
case REOP_PLUS:
|
|
kid = ren->kid;
|
|
if (!(kid->flags & RENODE_SINGLE)) {
|
|
/*
|
|
* FROM: |PLUS|<kid>|
|
|
*
|
|
* +-----------------------+
|
|
* V |
|
|
* TO: |<kid>|ALT|__|__|JUMP|__|__|ALT|__|__|EMPTY|
|
|
* | ^ | ^
|
|
* +-------------+ +--------+
|
|
*/
|
|
next = ren->next;
|
|
flag = (ren->flags & RENODE_GOODNEXT);
|
|
*ren = *kid;
|
|
jump = NewRENode(state, REOP_JUMP, NULL);
|
|
alt1 = NewRENode(state, REOP_ALT, jump);
|
|
if (!alt1 || !jump || !FixNext(state, ren, alt1, next))
|
|
return JS_FALSE;
|
|
alt1->flags |= flag;
|
|
jump->next = ren;
|
|
if (ren->flags & RENODE_ISNEXT)
|
|
ren->flags |= RENODE_ISJOIN;
|
|
if (!CloseTail(state, alt1, next))
|
|
return JS_FALSE;
|
|
}
|
|
break;
|
|
|
|
case REOP_OPT:
|
|
kid = ren->kid;
|
|
if (!(kid->flags & RENODE_SINGLE)) {
|
|
/*
|
|
* FROM: |OPT|<kid>|
|
|
*
|
|
* +------------------+
|
|
* | v
|
|
* TO: |ALT|__|__|<kid>|JUMP|__|__|ALT|__|__|EMPTY|
|
|
* | ^ | ^
|
|
* +-------------------+ +--------+
|
|
*/
|
|
ren->op = REOP_ALT;
|
|
next = ren->next;
|
|
jump = NewRENode(state, REOP_JUMP, NULL);
|
|
if (!jump || !FixNext(state, kid, jump, next))
|
|
return JS_FALSE;
|
|
jump->next = next;
|
|
if (!CloseTail(state, ren, next))
|
|
return JS_FALSE;
|
|
next->flags |= RENODE_ISJOIN;
|
|
}
|
|
break;
|
|
|
|
case REOP_FLAT:
|
|
/*
|
|
* Coalesce adjacent FLAT and FLAT1 nodes. Also coalesce FLAT and
|
|
* FLAT, which can result from deleting a coalesced FLAT1.
|
|
*/
|
|
while ((next = ren->next) != NULL &&
|
|
!(next->flags & RENODE_ISJOIN) &&
|
|
(REOP(next) == REOP_FLAT || REOP(next) == REOP_FLAT1)) {
|
|
if (REOP(next) == REOP_FLAT) {
|
|
cp2 = next->kid;
|
|
len2 = PTRDIFF((jschar *)next->u.kid2, cp2, jschar);
|
|
} else {
|
|
cp2 = &next->u.chr;
|
|
len2 = 1;
|
|
}
|
|
cp = ren->kid;
|
|
len = PTRDIFF((jschar *)ren->u.kid2, cp, jschar);
|
|
if (len + len2 > REOP_FLATLEN_MAX)
|
|
break;
|
|
cx = state->context;
|
|
reallok = ren->flags & RENODE_REALLOK;
|
|
if (reallok) {
|
|
/* Try to extend the last alloc, to fuse FLAT,FLAT1,... */
|
|
size = (len + 1) * sizeof(jschar);
|
|
incr = len2 * sizeof(jschar);
|
|
PR_ARENA_GROW(cp, &cx->tempPool, size, incr);
|
|
} else {
|
|
size = (len + len2 + 1) * sizeof(jschar);
|
|
PR_ARENA_ALLOCATE(cp, &cx->tempPool, size);
|
|
}
|
|
if (!cp) {
|
|
JS_ReportOutOfMemory(cx);
|
|
return JS_FALSE;
|
|
}
|
|
if (!reallok) {
|
|
js_strncpy(cp, ren->kid, len);
|
|
ren->flags |= RENODE_REALLOK;
|
|
}
|
|
js_strncpy(&cp[len], cp2, len2);
|
|
len += len2;
|
|
cp[len] = 0;
|
|
end_coalesce:
|
|
ren->kid = cp;
|
|
PR_ASSERT(ren->flags & RENODE_GOODNEXT);
|
|
if (!(next->flags & RENODE_GOODNEXT))
|
|
ren->flags &= ~RENODE_GOODNEXT;
|
|
ren->u.kid2 = cp + len;
|
|
ren->next = next->next;
|
|
next->op = REOP_EMPTY; /* next should be unreachable! */
|
|
}
|
|
break;
|
|
|
|
case REOP_FLAT1:
|
|
/*
|
|
* Coalesce adjacent FLAT1 nodes. Also coalesce FLAT1 and FLAT.
|
|
* After a single coalesce, we reuse the REOP_FLAT case's code by
|
|
* jumping into the bottom of its while loop.
|
|
*/
|
|
next = ren->next;
|
|
if (next &&
|
|
!(next->flags & RENODE_ISJOIN) &&
|
|
(REOP(next) == REOP_FLAT || REOP(next) == REOP_FLAT1)) {
|
|
if (REOP(next) == REOP_FLAT) {
|
|
cp2 = next->kid;
|
|
len = PTRDIFF((jschar *)next->u.kid2, cp2, jschar);
|
|
} else {
|
|
cp2 = &next->u.chr;
|
|
len = 1;
|
|
}
|
|
cx = state->context;
|
|
PR_ARENA_ALLOCATE(cp, &cx->tempPool, (len+2) * sizeof(jschar));
|
|
if (!cp) {
|
|
JS_ReportOutOfMemory(cx);
|
|
return JS_FALSE;
|
|
}
|
|
cp[0] = ren->u.chr;
|
|
js_strncpy(&cp[1], cp2, len);
|
|
cp[++len] = 0;
|
|
ren->op = REOP_FLAT;
|
|
ren->flags |= RENODE_REALLOK;
|
|
goto end_coalesce;
|
|
}
|
|
break;
|
|
|
|
default:;
|
|
}
|
|
|
|
/*
|
|
* Set ren's offset and advance progLength by ren's base size.
|
|
*/
|
|
ren->offset = (uint16) state->progLength;
|
|
state->progLength += reopsize[ren->op];
|
|
|
|
switch (REOP(ren)) {
|
|
case REOP_ALT:
|
|
case REOP_QUANT:
|
|
case REOP_STAR:
|
|
case REOP_PLUS:
|
|
case REOP_OPT:
|
|
case REOP_LPAREN:
|
|
case REOP_ANCHOR1:
|
|
/*
|
|
* Recur for nodes that have kid links to other nodes.
|
|
*/
|
|
if (!OptimizeRegExp(state, ren->kid))
|
|
return JS_FALSE;
|
|
break;
|
|
|
|
case REOP_CCLASS:
|
|
/*
|
|
* Check for a nonzero high byte or a \uXXXX escape sequence.
|
|
*/
|
|
cp = ren->kid;
|
|
cp2 = ren->u.kid2;
|
|
len = PTRDIFF(cp2, cp, jschar);
|
|
maxc = 0;
|
|
while (cp < cp2) {
|
|
c = *cp++;
|
|
if (c == '\\') {
|
|
if (cp + 5 <= cp2 && *cp == 'u' &&
|
|
JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
|
|
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) {
|
|
c = (((((JS7_UNHEX(cp[1]) << 4)
|
|
+ JS7_UNHEX(cp[2])) << 4)
|
|
+ JS7_UNHEX(cp[3])) << 4)
|
|
+ JS7_UNHEX(cp[4]);
|
|
cp += 5;
|
|
} else {
|
|
/*
|
|
* Octal and hex escapes can't be > 255. Skip this
|
|
* backslash and let the loop pass over the remaining
|
|
* escape sequence as if it were text to match.
|
|
*/
|
|
continue;
|
|
}
|
|
}
|
|
if (state->flags & JSREG_FOLD) {
|
|
/*
|
|
* Don't assume that lowercase are above uppercase, or
|
|
* that c is either even when c has upper and lowercase
|
|
* versions.
|
|
*/
|
|
if ((c2 = JS_TOUPPER(c)) > maxc)
|
|
maxc = c2;
|
|
if ((c2 = JS_TOLOWER(c2)) > maxc)
|
|
maxc = c2;
|
|
}
|
|
if (c > maxc)
|
|
maxc = c;
|
|
}
|
|
if (maxc >= CCLASS_CHARSET_SIZE) {
|
|
ren->op = (uint8)REOP_UCCLASS;
|
|
size = (size_t)(maxc + PR_BITS_PER_BYTE) / PR_BITS_PER_BYTE;
|
|
ren->u.ucclass.kidlen = (uint16)len;
|
|
ren->u.ucclass.bmsize = (uint16)size;
|
|
state->progLength -= reopsize[REOP_CCLASS];
|
|
state->progLength += reopsize[REOP_UCCLASS] + size;
|
|
}
|
|
break;
|
|
|
|
case REOP_FLAT:
|
|
/*
|
|
* FLAT takes 2 bytes plus the bytes in the string to match.
|
|
* If any character has a non-zero high byte, switch to UCFLAT
|
|
* and double the immediate operand length.
|
|
*/
|
|
cp = ren->kid;
|
|
cp2 = ren->u.kid2;
|
|
len = PTRDIFF(cp2, cp, jschar);
|
|
while (cp < cp2) {
|
|
c = *cp++;
|
|
if (c >> 8) {
|
|
ren->op = (uint8)REOP_UCFLAT;
|
|
len *= 2;
|
|
break;
|
|
}
|
|
}
|
|
state->progLength += len;
|
|
break;
|
|
|
|
case REOP_FLAT1:
|
|
c = ren->u.chr;
|
|
if (c >> 8) {
|
|
ren->op = (uint8)REOP_UCFLAT1;
|
|
state->progLength++;
|
|
}
|
|
break;
|
|
|
|
case REOP_JUMP:
|
|
/*
|
|
* Eliminate jumps to jumps.
|
|
*/
|
|
while ((next = ren->next) != NULL && REOP(next) == REOP_JUMP)
|
|
ren->next = next->next;
|
|
break;
|
|
|
|
case REOP_END:
|
|
/*
|
|
* End of program.
|
|
*/
|
|
return JS_TRUE;
|
|
|
|
default:;
|
|
}
|
|
|
|
if (!(ren->flags & RENODE_GOODNEXT))
|
|
break;
|
|
} while ((ren = ren->next) != NULL);
|
|
|
|
return JS_TRUE;
|
|
}
|
|
|
|
static JSBool
|
|
EmitRegExp(CompilerState *state, RENode *ren, JSRegExp *re)
|
|
{
|
|
REOp op;
|
|
jsbytecode *pc, fill;
|
|
RENode *next;
|
|
ptrdiff_t diff;
|
|
jschar *cp, *end, *ocp;
|
|
uintN b, c, i, j, n, lastc, foldc, nchars;
|
|
JSBool inrange;
|
|
|
|
do {
|
|
op = REOP(ren);
|
|
if (op == REOP_END)
|
|
return JS_TRUE;
|
|
|
|
pc = &re->program[state->progLength];
|
|
state->progLength += reopsize[ren->op];
|
|
pc[0] = ren->op;
|
|
next = ren->next;
|
|
|
|
switch (op) {
|
|
case REOP_ALT:
|
|
diff = next->offset - ren->offset;
|
|
SET_JUMP_OFFSET(pc, diff);
|
|
if (!EmitRegExp(state, ren->kid, re))
|
|
return JS_FALSE;
|
|
break;
|
|
|
|
case REOP_QUANT:
|
|
diff = next->offset - ren->offset;
|
|
SET_JUMP_OFFSET(pc, diff);
|
|
pc += 2;
|
|
SET_ARGNO(pc, ren->u.range.min);
|
|
pc += 2;
|
|
SET_ARGNO(pc, ren->u.range.max);
|
|
if (!EmitRegExp(state, ren->kid, re))
|
|
return JS_FALSE;
|
|
break;
|
|
|
|
case REOP_STAR:
|
|
case REOP_PLUS:
|
|
case REOP_OPT:
|
|
case REOP_ANCHOR1:
|
|
if (!EmitRegExp(state, ren->kid, re))
|
|
return JS_FALSE;
|
|
break;
|
|
|
|
case REOP_LPAREN:
|
|
SET_ARGNO(pc, ren->u.num);
|
|
if (!EmitRegExp(state, ren->kid, re))
|
|
return JS_FALSE;
|
|
break;
|
|
|
|
case REOP_RPAREN:
|
|
SET_ARGNO(pc, ren->u.num);
|
|
break;
|
|
|
|
case REOP_CCLASS:
|
|
case REOP_UCCLASS:
|
|
cp = ren->kid;
|
|
if (*cp == '^') {
|
|
pc[0] = (jsbytecode)
|
|
((op == REOP_CCLASS) ? REOP_NCCLASS : REOP_NUCCLASS);
|
|
fill = 0xff;
|
|
cp++;
|
|
} else {
|
|
fill = 0;
|
|
}
|
|
pc++;
|
|
if (op == REOP_CCLASS) {
|
|
end = ren->u.kid2;
|
|
for (i = 0; i < CCLASS_CHARSET_SIZE / PR_BITS_PER_BYTE; i++)
|
|
pc[i] = fill;
|
|
nchars = CCLASS_CHARSET_SIZE;
|
|
} else {
|
|
end = cp + ren->u.ucclass.kidlen;
|
|
n = (uintN)ren->u.ucclass.bmsize;
|
|
*pc++ = (jsbytecode)(n >> 8);
|
|
*pc++ = (jsbytecode)n;
|
|
state->progLength += n;
|
|
for (i = 0; i < n; i++)
|
|
pc[i] = fill;
|
|
nchars = n * PR_BITS_PER_BYTE;
|
|
}
|
|
|
|
/* Split ops up into statements to keep MSVC1.52 from crashing. */
|
|
#define MATCH_BIT(c) { i = (c) >> 3; b = (c) & 7; b = 1 << b; \
|
|
if (fill) pc[i] &= ~b; else pc[i] |= b; }
|
|
|
|
lastc = nchars;
|
|
inrange = JS_FALSE;
|
|
|
|
while (cp < end) {
|
|
c = (uintN) *cp++;
|
|
if (c == '\\') {
|
|
c = *cp++;
|
|
switch (c) {
|
|
case 'b':
|
|
case 'f':
|
|
case 'n':
|
|
case 'r':
|
|
case 't':
|
|
case 'v':
|
|
c = js_strchr(js_EscapeMap, (jschar)c)[-1];
|
|
break;
|
|
|
|
#define CHECK_RANGE() if (inrange) { MATCH_BIT(lastc); MATCH_BIT('-'); \
|
|
inrange = JS_FALSE; }
|
|
|
|
case 'd':
|
|
CHECK_RANGE();
|
|
for (c = '0'; c <= '9'; c++)
|
|
MATCH_BIT(c);
|
|
continue;
|
|
|
|
case 'D':
|
|
CHECK_RANGE();
|
|
for (c = 0; c < '0'; c++)
|
|
MATCH_BIT(c);
|
|
for (c = '9' + 1; c < nchars; c++)
|
|
MATCH_BIT(c);
|
|
continue;
|
|
|
|
case 'w':
|
|
CHECK_RANGE();
|
|
for (c = 0; c < nchars; c++)
|
|
if (JS_ISWORD(c))
|
|
MATCH_BIT(c);
|
|
continue;
|
|
|
|
case 'W':
|
|
CHECK_RANGE();
|
|
for (c = 0; c < nchars; c++)
|
|
if (!JS_ISWORD(c))
|
|
MATCH_BIT(c);
|
|
continue;
|
|
|
|
case 's':
|
|
CHECK_RANGE();
|
|
for (c = 0; c < nchars; c++)
|
|
if (JS_ISSPACE(c))
|
|
MATCH_BIT(c);
|
|
continue;
|
|
|
|
case 'S':
|
|
CHECK_RANGE();
|
|
for (c = 0; c < nchars; c++)
|
|
if (!JS_ISSPACE(c))
|
|
MATCH_BIT(c);
|
|
continue;
|
|
|
|
#undef CHECK_RANGE
|
|
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
n = JS7_UNDEC(c);
|
|
ocp = cp - 2;
|
|
c = *cp;
|
|
if ('0' <= c && c <= '7') {
|
|
cp++;
|
|
n = 8 * n + JS7_UNDEC(c);
|
|
|
|
c = *cp;
|
|
if ('0' <= c && c <= '7') {
|
|
cp++;
|
|
i = 8 * n + JS7_UNDEC(c);
|
|
if (i <= 0377)
|
|
n = i;
|
|
else
|
|
cp--;
|
|
}
|
|
}
|
|
c = n;
|
|
break;
|
|
|
|
case 'x':
|
|
ocp = cp;
|
|
c = *cp++;
|
|
if (JS7_ISHEX(c)) {
|
|
n = JS7_UNHEX(c);
|
|
c = *cp++;
|
|
if (JS7_ISHEX(c)) {
|
|
n <<= 4;
|
|
n += JS7_UNHEX(c);
|
|
}
|
|
} else {
|
|
cp = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */
|
|
n = 'x';
|
|
}
|
|
c = n;
|
|
break;
|
|
|
|
case 'u':
|
|
if (JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
|
|
JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
|
|
n = (((((JS7_UNHEX(cp[0]) << 4)
|
|
+ JS7_UNHEX(cp[1])) << 4)
|
|
+ JS7_UNHEX(cp[2])) << 4)
|
|
+ JS7_UNHEX(cp[3]);
|
|
c = n;
|
|
cp += 4;
|
|
}
|
|
break;
|
|
|
|
case 'c':
|
|
c = *cp++;
|
|
c = JS_TOUPPER(c);
|
|
c = JS_TOCTRL(c);
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (inrange) {
|
|
if (lastc > c) {
|
|
JS_ReportErrorNumber(state->context,
|
|
js_GetErrorMessage, NULL,
|
|
JSMSG_BAD_CLASS_RANGE);
|
|
return JS_FALSE;
|
|
}
|
|
inrange = JS_FALSE;
|
|
} else {
|
|
/* Set lastc so we match just c's bit in the for loop. */
|
|
lastc = c;
|
|
|
|
/* [balance: */
|
|
if (*cp == '-' && cp + 1 < end && cp[1] != ']') {
|
|
cp++;
|
|
inrange = JS_TRUE;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
/* Match characters in the range [lastc, c]. */
|
|
for (; lastc <= c; lastc++) {
|
|
MATCH_BIT(lastc);
|
|
if (state->flags & JSREG_FOLD) {
|
|
/*
|
|
* Must do both upper and lower for Turkish dotless i,
|
|
* Georgian, etc.
|
|
*/
|
|
foldc = JS_TOUPPER(lastc);
|
|
MATCH_BIT(foldc);
|
|
foldc = JS_TOLOWER(foldc);
|
|
MATCH_BIT(foldc);
|
|
}
|
|
}
|
|
lastc = c;
|
|
}
|
|
|
|
#undef MATCH_BIT
|
|
break;
|
|
|
|
case REOP_BACKREF:
|
|
if (state->flags & JSREG_FOLD)
|
|
pc[0] = (jsbytecode)REOP_BACKREFi;
|
|
pc[1] = (jsbytecode)ren->u.num;
|
|
break;
|
|
|
|
case REOP_FLAT:
|
|
if (state->flags & JSREG_FOLD)
|
|
pc[0] = (jsbytecode)REOP_FLATi;
|
|
goto emit_flat;
|
|
|
|
case REOP_UCFLAT:
|
|
if (state->flags & JSREG_FOLD)
|
|
pc[0] = (jsbytecode)REOP_UCFLATi;
|
|
emit_flat:
|
|
cp = ren->kid;
|
|
diff = (jschar *)ren->u.kid2 - cp;
|
|
pc[1] = (jsbytecode)diff;
|
|
pc += 2;
|
|
state->progLength += diff;
|
|
if (op == REOP_UCFLAT)
|
|
state->progLength += diff;
|
|
for (i = j = 0; i < (uintN)diff; i++, j++) {
|
|
c = (uintN)cp[i];
|
|
|
|
/*
|
|
* Lay down immediate chars in native byte order so memcmp
|
|
* with a JSString's chars works.
|
|
*/
|
|
#if IS_BIG_ENDIAN
|
|
if (op == REOP_UCFLAT)
|
|
pc[j++] = (jsbytecode)(c >> 8);
|
|
#endif
|
|
pc[j] = (jsbytecode)c;
|
|
#if IS_LITTLE_ENDIAN
|
|
if (op == REOP_UCFLAT)
|
|
pc[j++] = (jsbytecode)(c >> 8);
|
|
#endif
|
|
}
|
|
break;
|
|
|
|
case REOP_FLAT1:
|
|
if (state->flags & JSREG_FOLD)
|
|
pc[0] = (jsbytecode)REOP_FLAT1i;
|
|
pc[1] = (jsbytecode)ren->u.chr;
|
|
break;
|
|
|
|
case REOP_UCFLAT1:
|
|
if (state->flags & JSREG_FOLD)
|
|
pc[0] = (jsbytecode)REOP_UCFLAT1i;
|
|
c = (uintN)ren->u.chr;
|
|
pc[1] = (jsbytecode)(c >> 8);
|
|
pc[2] = (jsbytecode)c;
|
|
break;
|
|
|
|
case REOP_JUMP:
|
|
diff = next->offset - ren->offset;
|
|
SET_JUMP_OFFSET(pc, diff);
|
|
break;
|
|
|
|
default:;
|
|
}
|
|
|
|
if (!(ren->flags & RENODE_GOODNEXT))
|
|
break;
|
|
} while ((ren = next) != NULL);
|
|
return JS_TRUE;
|
|
}
|
|
|
|
JSRegExp *
|
|
js_NewRegExp(JSContext *cx, JSString *str, uintN flags)
|
|
{
|
|
JSRegExp *re;
|
|
void *mark;
|
|
CompilerState state;
|
|
RENode *ren, *end;
|
|
size_t resize;
|
|
|
|
re = NULL;
|
|
mark = PR_ARENA_MARK(&cx->tempPool);
|
|
|
|
state.context = cx;
|
|
state.cpbegin = state.cp = str->chars;
|
|
state.flags = flags;
|
|
state.parenCount = 0;
|
|
state.progLength = 0;
|
|
|
|
ren = ParseRegExp(&state);
|
|
if (!ren)
|
|
goto out;
|
|
|
|
end = NewRENode(&state, REOP_END, NULL);
|
|
if (!end || !SetNext(&state, ren, end))
|
|
goto out;
|
|
|
|
if (!AnchorRegExp(&state, ren))
|
|
goto out;
|
|
if (!OptimizeRegExp(&state, ren))
|
|
goto out;
|
|
|
|
#ifdef DEBUG_notme
|
|
if (!DumpRegExp(cx, ren))
|
|
goto out;
|
|
#endif
|
|
|
|
resize = sizeof *re + state.progLength - 1;
|
|
re = JS_malloc(cx, PR_ROUNDUP(resize, sizeof(prword)));
|
|
if (!re)
|
|
goto out;
|
|
re->source = str;
|
|
re->length = state.progLength;
|
|
re->lastIndex = 0;
|
|
re->parenCount = state.parenCount;
|
|
re->flags = flags;
|
|
|
|
state.progLength = 0;
|
|
if (!EmitRegExp(&state, ren, re)) {
|
|
js_DestroyRegExp(cx, re);
|
|
re = NULL;
|
|
goto out;
|
|
}
|
|
|
|
#ifdef DEBUG_notme
|
|
{
|
|
/* print the compiled regexp program bytecode */
|
|
size_t i;
|
|
for (i = 0; i < state.progLength; i++) {
|
|
int b = (int) re->program[i];
|
|
fprintf(stderr, "%d", b);
|
|
if ((i > 0 && i % 8 == 0) || i == state.progLength-1)
|
|
fprintf(stderr, "\n");
|
|
else
|
|
fprintf(stderr, ", ");
|
|
}
|
|
fprintf(stderr, "\n");
|
|
}
|
|
#endif
|
|
|
|
/* Success: lock re->source string. */
|
|
(void) js_LockGCThing(cx, str);
|
|
out:
|
|
PR_ARENA_RELEASE(&cx->tempPool, mark);
|
|
return re;
|
|
}
|
|
|
|
JSRegExp *
|
|
js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt)
|
|
{
|
|
uintN flags;
|
|
jschar *cp;
|
|
|
|
flags = 0;
|
|
if (opt) {
|
|
for (cp = opt->chars; *cp; cp++) {
|
|
switch (*cp) {
|
|
case 'g':
|
|
flags |= JSREG_GLOB;
|
|
break;
|
|
case 'i':
|
|
flags |= JSREG_FOLD;
|
|
break;
|
|
default: {
|
|
char charBuf[2] = " ";
|
|
charBuf[0] = (char)*cp;
|
|
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
|
JSMSG_BAD_FLAG, charBuf);
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return js_NewRegExp(cx, str, flags);
|
|
}
|
|
|
|
void
|
|
js_DestroyRegExp(JSContext *cx, JSRegExp *re)
|
|
{
|
|
js_UnlockGCThing(cx, re->source);
|
|
JS_free(cx, re);
|
|
}
|
|
|
|
typedef struct MatchState {
|
|
JSContext *context; /* for access to regExpStatics */
|
|
JSBool anchoring; /* true if multiline anchoring ^/$ */
|
|
jsbytecode *pcend; /* pc limit (fencepost) */
|
|
const jschar *cpbegin, *cpend; /* cp base address and limit */
|
|
size_t start; /* offset from cpbegin to start at */
|
|
ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
|
|
uintN parenCount; /* number of paren substring matches */
|
|
JSSubString *maybeParens; /* possible paren substring pointers */
|
|
JSSubString *parens; /* certain paren substring matches */
|
|
} MatchState;
|
|
|
|
/*
|
|
* Returns updated cp on match, null on mismatch.
|
|
*/
|
|
static const jschar *
|
|
MatchRegExp(MatchState *state, jsbytecode *pc, const jschar *cp)
|
|
{
|
|
jsbytecode *pc2, *pcend;
|
|
const jschar *cp2, *cp3, *cpbegin, *cpend;
|
|
REOp op;
|
|
JSBool matched;
|
|
ptrdiff_t i, oplen, altlen, matchlen;
|
|
uintN min, max, num;
|
|
JSSubString *parsub;
|
|
const jschar *parstr;
|
|
size_t parlen;
|
|
jschar c, c2;
|
|
uintN bit, byte, size;
|
|
|
|
pcend = state->pcend;
|
|
cpbegin = state->cpbegin;
|
|
cpend = state->cpend;
|
|
|
|
while (pc < pcend) {
|
|
op = (REOp) *pc;
|
|
oplen = reopsize[op];
|
|
|
|
switch (op) {
|
|
case REOP_EMPTY:
|
|
pc += oplen;
|
|
continue;
|
|
|
|
case REOP_ALT:
|
|
altlen = GET_JUMP_OFFSET(pc);
|
|
pc2 = pc + oplen;
|
|
if ((REOp)pc[altlen] != REOP_ALT) {
|
|
pc = pc2;
|
|
continue;
|
|
}
|
|
cp2 = MatchRegExp(state, pc2, cp);
|
|
if (cp2)
|
|
return cp2;
|
|
pc += altlen;
|
|
continue;
|
|
|
|
case REOP_BOL:
|
|
matched = (cp == cpbegin);
|
|
if (state->context->regExpStatics.multiline) {
|
|
/* Anchor-search only if RegExp.multiline is true. */
|
|
if (state->anchoring) {
|
|
if (!matched)
|
|
matched = (cp[-1] == '\n');
|
|
} else {
|
|
state->anchoring = JS_TRUE;
|
|
for (cp2 = cp; cp2 < cpend; cp2++) {
|
|
if (cp2 == cpbegin || cp2[-1] == '\n') {
|
|
cp3 = MatchRegExp(state, pc, cp2);
|
|
if (cp3) {
|
|
state->skipped = cp2 - cp;
|
|
state->anchoring = JS_FALSE;
|
|
return cp3;
|
|
}
|
|
}
|
|
}
|
|
state->anchoring = JS_FALSE;
|
|
}
|
|
}
|
|
matchlen = 0;
|
|
break;
|
|
|
|
case REOP_EOL:
|
|
case REOP_EOLONLY:
|
|
matched = (cp == cpend);
|
|
if (op == REOP_EOL || state->anchoring) {
|
|
if (!matched && state->context->regExpStatics.multiline)
|
|
matched = (*cp == '\n');
|
|
} else {
|
|
/* Always anchor-search EOLONLY, which has no BOL analogue. */
|
|
state->anchoring = JS_TRUE;
|
|
for (cp2 = cp; cp2 <= cpend; cp2++) {
|
|
if (cp2 == cpend || *cp2 == '\n') {
|
|
cp3 = MatchRegExp(state, pc, cp2);
|
|
if (cp3) {
|
|
state->anchoring = JS_FALSE;
|
|
state->skipped = cp2 - cp;
|
|
return cp3;
|
|
}
|
|
}
|
|
}
|
|
state->anchoring = JS_FALSE;
|
|
}
|
|
matchlen = 0;
|
|
break;
|
|
|
|
case REOP_WBDRY:
|
|
matched = (cp == cpbegin || !JS_ISWORD(cp[-1])) ^ !JS_ISWORD(*cp);
|
|
matchlen = 0;
|
|
break;
|
|
|
|
case REOP_WNONBDRY:
|
|
matched = (cp == cpbegin || !JS_ISWORD(cp[-1])) ^ JS_ISWORD(*cp);
|
|
matchlen = 0;
|
|
break;
|
|
|
|
case REOP_QUANT:
|
|
pc2 = pc;
|
|
oplen = GET_JUMP_OFFSET(pc2);
|
|
pc2 += 2;
|
|
min = GET_ARGNO(pc2);
|
|
pc2 += 2;
|
|
max = GET_ARGNO(pc2);
|
|
pc2 += 3;
|
|
|
|
/* Reduce state->pcend so we match only the quantified regexp. */
|
|
state->pcend = pc + oplen;
|
|
|
|
/* If min is non-zero, insist on at least that many matches. */
|
|
for (num = 0; num < min; num++) {
|
|
cp = MatchRegExp(state, pc2, cp);
|
|
if (!cp) {
|
|
state->pcend = pcend;
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
/* Try matches from min to max, or forever if max == 0. */
|
|
for (; !max || num < max; num++) {
|
|
cp2 = MatchRegExp(state, pc2, cp);
|
|
if (!cp2)
|
|
break;
|
|
cp = cp2;
|
|
}
|
|
|
|
/* Restore state->pcend and set match and matchlen. */
|
|
state->pcend = pcend;
|
|
matched = (min <= num && (!max || num <= max));
|
|
matchlen = 0;
|
|
break;
|
|
|
|
case REOP_LPAREN:
|
|
num = GET_ARGNO(pc);
|
|
parsub = &state->maybeParens[num];
|
|
parstr = parsub->chars;
|
|
parsub->chars = cp;
|
|
pc += oplen;
|
|
cp3 = MatchRegExp(state, pc, cp);
|
|
if (!cp3) {
|
|
/* Restore so later backrefs work, unlike Perl4. */
|
|
parsub->chars = parstr;
|
|
return NULL;
|
|
}
|
|
parsub = &state->parens[num];
|
|
if (!parsub->chars) {
|
|
cp2 = cpbegin + state->start + state->skipped;
|
|
if (cp < cp2) {
|
|
parsub->chars = cp2;
|
|
parsub->length -= cp2 - cp;
|
|
} else {
|
|
parsub->chars = cp;
|
|
}
|
|
}
|
|
return cp3;
|
|
|
|
case REOP_RPAREN:
|
|
num = GET_ARGNO(pc);
|
|
parsub = &state->maybeParens[num];
|
|
parsub->length = parlen = cp - parsub->chars;
|
|
pc += oplen;
|
|
cp = MatchRegExp(state, pc, cp);
|
|
if (cp) {
|
|
parsub = &state->parens[num];
|
|
if (!parsub->chars)
|
|
parsub->length = parlen;
|
|
if (num >= state->parenCount)
|
|
state->parenCount = num + 1;
|
|
}
|
|
return cp;
|
|
|
|
case REOP_BACKREF:
|
|
num = (uintN)pc[1];
|
|
parsub = &state->maybeParens[num];
|
|
matchlen = (ptrdiff_t)parsub->length;
|
|
matched = (cp + matchlen <= cpend &&
|
|
!memcmp(cp, parsub->chars, matchlen * sizeof(jschar)));
|
|
break;
|
|
|
|
/*
|
|
* See java.lang.String for more on why both toupper and tolower are needed, in
|
|
* comments for equalsIgnoreCase and regionMatches(boolean ignoreCase, ...).
|
|
*/
|
|
#define MATCH_CHARS_IGNORING_CASE(c, c2) \
|
|
((c) == (c2) || \
|
|
(c = JS_TOUPPER(c)) == (c2 = JS_TOUPPER(c2)) || \
|
|
JS_TOLOWER(c) == JS_TOLOWER(c2))
|
|
|
|
case REOP_BACKREFi:
|
|
num = (uintN)pc[1];
|
|
parsub = &state->maybeParens[num];
|
|
matchlen = (ptrdiff_t)parsub->length;
|
|
matched = (cp + matchlen <= cpend);
|
|
if (matched) {
|
|
for (i = 0; i < matchlen; i++) {
|
|
c = cp[i];
|
|
c2 = parsub->chars[i];
|
|
matched = MATCH_CHARS_IGNORING_CASE(c, c2);
|
|
if (!matched)
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
#define SINGLE_CASES \
|
|
case REOP_DOT: \
|
|
matched = (cp != cpend && *cp != '\n'); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
NONDOT_SINGLE_CASES \
|
|
/* END SINGLE_CASES */
|
|
|
|
#define NONDOT_SINGLE_CASES \
|
|
case REOP_CCLASS: \
|
|
case REOP_NCCLASS: \
|
|
c = *cp; \
|
|
if (c >= CCLASS_CHARSET_SIZE) { \
|
|
matched = (op == REOP_NCCLASS); \
|
|
} else { \
|
|
byte = (uintN)c >> 3; \
|
|
bit = c & 7; \
|
|
bit = 1 << bit; \
|
|
matched = pc[1 + byte] & bit; \
|
|
} \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_DIGIT: \
|
|
matched = JS_ISDIGIT(*cp); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_NONDIGIT: \
|
|
matched = !JS_ISDIGIT(*cp); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_ALNUM: \
|
|
matched = JS_ISWORD(*cp); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_NONALNUM: \
|
|
matched = !JS_ISWORD(*cp); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_SPACE: \
|
|
matched = JS_ISSPACE(*cp); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_NONSPACE: \
|
|
matched = !JS_ISSPACE(*cp); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_FLAT1: \
|
|
c = *cp; \
|
|
c2 = (jschar)pc[1]; \
|
|
matched = (c == c2); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_FLAT1i: \
|
|
c = *cp; \
|
|
c2 = (jschar)pc[1]; \
|
|
matched = MATCH_CHARS_IGNORING_CASE(c, c2); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_UCFLAT1: \
|
|
c = *cp; \
|
|
c2 = ((pc[1] << 8) | pc[2]); \
|
|
matched = (c == c2); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_UCFLAT1i: \
|
|
c = *cp; \
|
|
c2 = ((pc[1] << 8) | pc[2]); \
|
|
matched = MATCH_CHARS_IGNORING_CASE(c, c2); \
|
|
matchlen = 1; \
|
|
break; \
|
|
\
|
|
case REOP_UCCLASS: \
|
|
case REOP_NUCCLASS: \
|
|
size = (pc[1] << 8) | pc[2]; \
|
|
oplen += size; \
|
|
c = *cp; \
|
|
byte = (uintN)c >> 3; \
|
|
if (byte >= size) { \
|
|
matched = (op == REOP_NUCCLASS); \
|
|
} else { \
|
|
bit = c & 7; \
|
|
bit = 1 << bit; \
|
|
matched = pc[3 + byte] & bit; \
|
|
} \
|
|
matchlen = 1; \
|
|
break; \
|
|
/* END NONDOT_SINGLE_CASES */
|
|
|
|
/*
|
|
* Macro-expand single-char/single-opcode cases here and below.
|
|
*/
|
|
SINGLE_CASES
|
|
|
|
case REOP_STAR:
|
|
op = (REOp) *++pc;
|
|
oplen = reopsize[op];
|
|
for (cp2 = cp; cp < cpend; cp++) {
|
|
switch (op) {
|
|
NONDOT_SINGLE_CASES
|
|
default:
|
|
PR_ASSERT(0);
|
|
}
|
|
if (!matched)
|
|
break;
|
|
}
|
|
|
|
backtracker:
|
|
pc += oplen;
|
|
do {
|
|
cp3 = MatchRegExp(state, pc, cp);
|
|
if (cp3)
|
|
return cp3;
|
|
} while (--cp >= cp2);
|
|
return NULL;
|
|
|
|
case REOP_PLUS:
|
|
op = (REOp) *++pc;
|
|
oplen = reopsize[op];
|
|
for (cp2 = cp; cp < cpend; cp++) {
|
|
switch (op) {
|
|
SINGLE_CASES
|
|
default:
|
|
PR_ASSERT(0);
|
|
}
|
|
if (!matched)
|
|
break;
|
|
}
|
|
if (cp == cp2) {
|
|
/* Did not match once, hope for an alternative. */
|
|
return NULL;
|
|
}
|
|
/* Matched one or more times, try rest of regexp. */
|
|
cp2++;
|
|
goto backtracker;
|
|
|
|
case REOP_OPT:
|
|
op = (REOp) *++pc;
|
|
oplen = reopsize[op];
|
|
switch (op) {
|
|
SINGLE_CASES
|
|
default:
|
|
PR_ASSERT(0);
|
|
}
|
|
pc += oplen;
|
|
if (matched) {
|
|
cp2 = MatchRegExp(state, pc, cp + 1);
|
|
if (cp2)
|
|
return cp2;
|
|
}
|
|
continue;
|
|
|
|
case REOP_FLAT:
|
|
matchlen = (ptrdiff_t)pc[1];
|
|
oplen += matchlen;
|
|
matched = (cp + matchlen <= cpend);
|
|
if (matched) {
|
|
pc2 = pc + 2;
|
|
for (i = 0; i < matchlen; i++) {
|
|
matched = (cp[i] == (jschar)pc2[i]);
|
|
if (!matched)
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case REOP_FLATi:
|
|
matchlen = (ptrdiff_t)pc[1];
|
|
oplen += matchlen;
|
|
matched = (cp + matchlen <= cpend);
|
|
if (matched) {
|
|
pc2 = pc + 2;
|
|
for (i = 0; i < matchlen; i++) {
|
|
c = cp[i];
|
|
c2 = (jschar)pc2[i];
|
|
matched = MATCH_CHARS_IGNORING_CASE(c, c2);
|
|
if (!matched)
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case REOP_UCFLAT:
|
|
matchlen = (ptrdiff_t)pc[1];
|
|
oplen += 2 * matchlen;
|
|
matched = (cp + matchlen <= cpend &&
|
|
!memcmp(cp, pc + 2, matchlen * sizeof(jschar)));
|
|
break;
|
|
|
|
case REOP_UCFLATi:
|
|
matchlen = (ptrdiff_t)pc[1];
|
|
oplen += matchlen;
|
|
matched = (cp + matchlen <= cpend);
|
|
if (matched) {
|
|
pc2 = pc + 2;
|
|
for (i = 0; i < matchlen; i++) {
|
|
c = cp[i];
|
|
#if IS_BIG_ENDIAN
|
|
c2 = *pc2++ << 8;
|
|
c2 |= *pc2++;
|
|
#endif
|
|
#if IS_LITTLE_ENDIAN
|
|
c2 = *pc2++;
|
|
c2 |= *pc2++ << 8;
|
|
#endif
|
|
matched = MATCH_CHARS_IGNORING_CASE(c, c2);
|
|
if (!matched)
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case REOP_JUMP:
|
|
oplen = GET_JUMP_OFFSET(pc);
|
|
pc += oplen;
|
|
continue;
|
|
|
|
case REOP_DOTSTAR:
|
|
for (cp2 = cp; cp2 < cpend; cp2++)
|
|
if (*cp2 == '\n')
|
|
break;
|
|
for (pc2 = pc + oplen; cp2 >= cp; cp2--) {
|
|
cp3 = MatchRegExp(state, pc2, cp2);
|
|
if (cp3)
|
|
return cp3;
|
|
}
|
|
return NULL;
|
|
|
|
case REOP_ANCHOR:
|
|
pc2 = pc + oplen;
|
|
if (pc2 == pcend)
|
|
break;
|
|
for (cp2 = cp; cp2 < cpend; cp2++) {
|
|
cp3 = MatchRegExp(state, pc2, cp2);
|
|
if (cp3) {
|
|
state->skipped = cp2 - cp;
|
|
return cp3;
|
|
}
|
|
}
|
|
return NULL;
|
|
|
|
case REOP_ANCHOR1:
|
|
op = (REOp) *++pc;
|
|
oplen = reopsize[op];
|
|
pc2 = pc + oplen;
|
|
PR_ASSERT(pc2 < pcend);
|
|
for (cp2 = cp; cp < cpend; cp++) {
|
|
switch (op) {
|
|
NONDOT_SINGLE_CASES
|
|
default:
|
|
PR_ASSERT(0);
|
|
}
|
|
if (matched) {
|
|
cp3 = MatchRegExp(state, pc2, cp);
|
|
if (cp3) {
|
|
state->skipped = cp - cp2;
|
|
return cp3;
|
|
}
|
|
}
|
|
}
|
|
return NULL;
|
|
|
|
#undef MATCH_CHARS_IGNORING_CASE
|
|
#undef SINGLE_CASES
|
|
#undef NONDOT_SINGLE_CASES
|
|
|
|
default:
|
|
PR_ASSERT(0);
|
|
return NULL;
|
|
}
|
|
|
|
if (!matched)
|
|
return NULL;
|
|
pc += oplen;
|
|
if (matchlen) {
|
|
cp += matchlen;
|
|
if (cp > cpend)
|
|
cp = cpend;
|
|
}
|
|
}
|
|
|
|
return cp;
|
|
}
|
|
|
|
JSBool
|
|
js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
|
|
JSBool test, jsval *rval)
|
|
{
|
|
MatchState state;
|
|
jsbytecode *pc;
|
|
const jschar *cp, *ep;
|
|
size_t i, length, start;
|
|
void *mark;
|
|
JSSubString *parsub, *morepar;
|
|
JSBool ok;
|
|
JSRegExpStatics *res;
|
|
ptrdiff_t matchlen;
|
|
uintN num, morenum;
|
|
JSString *parstr, *matchstr;
|
|
JSObject *obj;
|
|
|
|
/*
|
|
* Initialize a state struct to minimize recursive argument traffic.
|
|
*/
|
|
state.context = cx;
|
|
state.anchoring = JS_FALSE;
|
|
pc = re->program;
|
|
state.pcend = pc + re->length;
|
|
|
|
/*
|
|
* It's safe to load from cp because JSStrings have a zero at the end,
|
|
* and we never let cp get beyond cpend.
|
|
*/
|
|
start = *indexp;
|
|
if (start > str->length)
|
|
start = str->length;
|
|
cp = str->chars + start;
|
|
state.cpbegin = str->chars;
|
|
state.cpend = str->chars + str->length;
|
|
state.start = start;
|
|
state.skipped = 0;
|
|
|
|
/*
|
|
* Use the temporary arena pool to grab space for parenthetical matches.
|
|
* After the PR_ARENA_ALLOCATE early return on error, goto out to be sure
|
|
* to free this memory.
|
|
*/
|
|
length = 2 * sizeof(JSSubString) * re->parenCount;
|
|
mark = PR_ARENA_MARK(&cx->tempPool);
|
|
PR_ARENA_ALLOCATE(parsub, &cx->tempPool, length);
|
|
if (!parsub) {
|
|
JS_ReportOutOfMemory(cx);
|
|
return JS_FALSE;
|
|
}
|
|
memset(parsub, 0, length);
|
|
state.parenCount = 0;
|
|
state.maybeParens = parsub;
|
|
state.parens = parsub + re->parenCount;
|
|
ok = JS_TRUE;
|
|
|
|
/*
|
|
* Call the recursive matcher to do the real work. Return null on mismatch
|
|
* whether testing or not. On match, return an extended Array object.
|
|
*/
|
|
cp = MatchRegExp(&state, pc, cp);
|
|
if (!cp) {
|
|
*rval = JSVAL_NULL;
|
|
goto out;
|
|
}
|
|
i = PTRDIFF(cp, state.cpbegin, jschar);
|
|
*indexp = i;
|
|
matchlen = i - (start + state.skipped);
|
|
ep = cp;
|
|
cp -= matchlen;
|
|
|
|
if (test) {
|
|
/*
|
|
* Testing for a match and updating cx->regExpStatics: don't allocate
|
|
* an array object, do return true.
|
|
*/
|
|
*rval = JSVAL_TRUE;
|
|
} else {
|
|
/*
|
|
* The array returned on match has element 0 bound to the matched
|
|
* string, elements 1 through state.parenCount bound to the paren
|
|
* matches, an index property telling the length of the left context,
|
|
* and an input property referring to the input string.
|
|
*/
|
|
obj = js_NewArrayObject(cx, 0, NULL);
|
|
if (!obj) {
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
*rval = OBJECT_TO_JSVAL(obj);
|
|
|
|
#define DEFVAL(val, id) { \
|
|
ok = js_DefineProperty(cx, obj, id, val, \
|
|
JS_PropertyStub, JS_PropertyStub, \
|
|
JSPROP_ENUMERATE, NULL); \
|
|
if (!ok) { \
|
|
cx->newborn[GCX_OBJECT] = NULL; \
|
|
cx->newborn[GCX_STRING] = NULL; \
|
|
goto out; \
|
|
} \
|
|
}
|
|
|
|
matchstr = js_NewStringCopyN(cx, cp, matchlen, 0);
|
|
if (!matchstr) {
|
|
cx->newborn[GCX_OBJECT] = NULL;
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSVAL(0));
|
|
}
|
|
|
|
res = &cx->regExpStatics;
|
|
PR_ASSERT(state.parenCount <= re->parenCount);
|
|
if (state.parenCount == 0) {
|
|
res->parenCount = 0;
|
|
res->lastParen = js_EmptySubString;
|
|
} else {
|
|
for (num = 0; num < state.parenCount; num++) {
|
|
parsub = &state.parens[num];
|
|
if (num < 9) {
|
|
res->parens[num] = *parsub;
|
|
} else {
|
|
morenum = num - 9;
|
|
morepar = res->moreParens;
|
|
if (!morepar) {
|
|
res->moreLength = 10;
|
|
morepar = JS_malloc(cx, 10 * sizeof(JSSubString));
|
|
} else if (morenum > res->moreLength) {
|
|
res->moreLength += 10;
|
|
morepar = JS_realloc(cx, morepar,
|
|
res->moreLength * sizeof(JSSubString));
|
|
}
|
|
if (!morepar) {
|
|
cx->newborn[GCX_OBJECT] = NULL;
|
|
cx->newborn[GCX_STRING] = NULL;
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
res->moreParens = morepar;
|
|
morepar[morenum] = *parsub;
|
|
}
|
|
if (test)
|
|
continue;
|
|
parstr = js_NewStringCopyN(cx, parsub->chars, parsub->length, 0);
|
|
if (!parstr) {
|
|
cx->newborn[GCX_OBJECT] = NULL;
|
|
cx->newborn[GCX_STRING] = NULL;
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
ok = js_DefineProperty(cx, obj, INT_TO_JSVAL(num + 1),
|
|
STRING_TO_JSVAL(parstr), NULL, NULL,
|
|
JSPROP_ENUMERATE, NULL);
|
|
if (!ok) {
|
|
cx->newborn[GCX_OBJECT] = NULL;
|
|
cx->newborn[GCX_STRING] = NULL;
|
|
goto out;
|
|
}
|
|
}
|
|
res->parenCount = num;
|
|
res->lastParen = *parsub;
|
|
}
|
|
|
|
if (!test) {
|
|
/*
|
|
* Define the index and input properties last for better for/in loop
|
|
* order (so they come after the elements).
|
|
*/
|
|
DEFVAL(INT_TO_JSVAL(start + state.skipped),
|
|
(jsid)cx->runtime->atomState.indexAtom);
|
|
DEFVAL(STRING_TO_JSVAL(str),
|
|
(jsid)cx->runtime->atomState.inputAtom);
|
|
}
|
|
|
|
#undef DEFVAL
|
|
|
|
res->lastMatch.chars = cp;
|
|
res->lastMatch.length = matchlen;
|
|
if (cx->version == JSVERSION_1_2) {
|
|
/*
|
|
* JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
|
|
* in scalar contexts, and unintentionally for the string.match "list"
|
|
* psuedo-context. On "hi there bye", the following would result:
|
|
*
|
|
* Language while(/ /g){print("$`");} s/ /$`/g
|
|
* perl4.036 "hi", "there" "hihitherehi therebye"
|
|
* perl5 "hi", "hi there" "hihitherehi therebye"
|
|
* js1.2 "hi", "there" "hihitheretherebye"
|
|
*/
|
|
res->leftContext.chars = str->chars + start;
|
|
res->leftContext.length = state.skipped;
|
|
} else {
|
|
/*
|
|
* For JS1.3 and ECMAv2, emulate Perl5 exactly:
|
|
*
|
|
* js1.3 "hi", "hi there" "hihitherehi therebye"
|
|
*/
|
|
res->leftContext.chars = str->chars;
|
|
res->leftContext.length = start + state.skipped;
|
|
}
|
|
res->rightContext.chars = ep;
|
|
res->rightContext.length = state.cpend - ep;
|
|
|
|
out:
|
|
PR_ARENA_RELEASE(&cx->tempPool, mark);
|
|
return ok;
|
|
}
|
|
|
|
/************************************************************************/
|
|
|
|
enum regexp_tinyid {
|
|
REGEXP_SOURCE = -1,
|
|
REGEXP_GLOBAL = -2,
|
|
REGEXP_IGNORE_CASE = -3,
|
|
REGEXP_LAST_INDEX = -4
|
|
};
|
|
|
|
static JSPropertySpec regexp_props[] = {
|
|
{"source", REGEXP_SOURCE, JSPROP_ENUMERATE | JSPROP_READONLY},
|
|
{"global", REGEXP_GLOBAL, JSPROP_ENUMERATE | JSPROP_READONLY},
|
|
{"ignoreCase", REGEXP_IGNORE_CASE, JSPROP_ENUMERATE | JSPROP_READONLY},
|
|
{"lastIndex", REGEXP_LAST_INDEX, JSPROP_ENUMERATE},
|
|
{0}
|
|
};
|
|
|
|
static JSBool
|
|
regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
|
|
{
|
|
jsint slot;
|
|
JSRegExp *re;
|
|
|
|
if (!JSVAL_IS_INT(id))
|
|
return JS_TRUE;
|
|
slot = JSVAL_TO_INT(id);
|
|
JS_LOCK_OBJ(cx, obj);
|
|
re = JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
|
|
if (re) {
|
|
switch (slot) {
|
|
case REGEXP_SOURCE:
|
|
*vp = STRING_TO_JSVAL(re->source);
|
|
break;
|
|
case REGEXP_GLOBAL:
|
|
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
|
|
break;
|
|
case REGEXP_IGNORE_CASE:
|
|
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
|
|
break;
|
|
case REGEXP_LAST_INDEX:
|
|
*vp = INT_TO_JSVAL((jsint)re->lastIndex);
|
|
break;
|
|
}
|
|
}
|
|
JS_UNLOCK_OBJ(cx, obj);
|
|
return JS_TRUE;
|
|
}
|
|
|
|
static JSBool
|
|
regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
|
|
{
|
|
jsint slot;
|
|
JSRegExp *re;
|
|
jsdouble d;
|
|
|
|
if (!JSVAL_IS_INT(id))
|
|
return JS_TRUE;
|
|
slot = JSVAL_TO_INT(id);
|
|
JS_LOCK_OBJ(cx, obj);
|
|
re = JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
|
|
if (re && slot == REGEXP_LAST_INDEX) {
|
|
if (!js_ValueToNumber(cx, *vp, &d))
|
|
return JS_FALSE;
|
|
re->lastIndex = (size_t)d;
|
|
}
|
|
JS_UNLOCK_OBJ(cx, obj);
|
|
return JS_TRUE;
|
|
}
|
|
|
|
/*
|
|
* RegExp class static properties and their Perl counterparts:
|
|
*
|
|
* RegExp.input $_
|
|
* RegExp.multiline $*
|
|
* RegExp.lastMatch $&
|
|
* RegExp.lastParen $+
|
|
* RegExp.leftContext $`
|
|
* RegExp.rightContext $'
|
|
*/
|
|
enum regexp_static_tinyid {
|
|
REGEXP_STATIC_INPUT = -1,
|
|
REGEXP_STATIC_MULTILINE = -2,
|
|
REGEXP_STATIC_LAST_MATCH = -3,
|
|
REGEXP_STATIC_LAST_PAREN = -4,
|
|
REGEXP_STATIC_LEFT_CONTEXT = -5,
|
|
REGEXP_STATIC_RIGHT_CONTEXT = -6
|
|
};
|
|
|
|
JSBool
|
|
js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
|
|
{
|
|
JS_ClearRegExpStatics(cx);
|
|
return js_AddRoot(cx, &res->input, "res->input");
|
|
}
|
|
|
|
void
|
|
js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
|
|
{
|
|
if (res->moreParens) {
|
|
JS_free(cx, res->moreParens);
|
|
res->moreParens = NULL;
|
|
}
|
|
js_RemoveRoot(cx, &res->input);
|
|
}
|
|
|
|
static JSBool
|
|
regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
|
|
{
|
|
jsint slot;
|
|
JSRegExpStatics *res;
|
|
JSString *str;
|
|
JSSubString *sub;
|
|
|
|
res = &cx->regExpStatics;
|
|
if (!JSVAL_IS_INT(id))
|
|
return JS_TRUE;
|
|
slot = JSVAL_TO_INT(id);
|
|
switch (slot) {
|
|
case REGEXP_STATIC_INPUT:
|
|
*vp = res->input ? STRING_TO_JSVAL(res->input)
|
|
: JS_GetEmptyStringValue(cx);
|
|
return JS_TRUE;
|
|
case REGEXP_STATIC_MULTILINE:
|
|
*vp = BOOLEAN_TO_JSVAL(res->multiline);
|
|
return JS_TRUE;
|
|
case REGEXP_STATIC_LAST_MATCH:
|
|
sub = &res->lastMatch;
|
|
break;
|
|
case REGEXP_STATIC_LAST_PAREN:
|
|
sub = &res->lastParen;
|
|
break;
|
|
case REGEXP_STATIC_LEFT_CONTEXT:
|
|
sub = &res->leftContext;
|
|
break;
|
|
case REGEXP_STATIC_RIGHT_CONTEXT:
|
|
sub = &res->rightContext;
|
|
break;
|
|
default:
|
|
sub = REGEXP_PAREN_SUBSTRING(res, slot);
|
|
break;
|
|
}
|
|
str = js_NewStringCopyN(cx, sub->chars, sub->length, 0);
|
|
if (!str)
|
|
return JS_FALSE;
|
|
*vp = STRING_TO_JSVAL(str);
|
|
return JS_TRUE;
|
|
}
|
|
|
|
static JSBool
|
|
regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
|
|
{
|
|
JSRegExpStatics *res;
|
|
|
|
if (!JSVAL_IS_INT(id))
|
|
return JS_TRUE;
|
|
res = &cx->regExpStatics;
|
|
/* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
|
|
if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
|
|
if (!JSVAL_IS_STRING(*vp) &&
|
|
!JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
|
|
return JS_FALSE;
|
|
}
|
|
res->input = JSVAL_TO_STRING(*vp);
|
|
} else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
|
|
if (!JSVAL_IS_BOOLEAN(*vp) &&
|
|
!JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
|
|
return JS_FALSE;
|
|
}
|
|
res->multiline = JSVAL_TO_BOOLEAN(*vp);
|
|
}
|
|
return JS_TRUE;
|
|
}
|
|
|
|
static JSPropertySpec regexp_static_props[] = {
|
|
{"input",
|
|
REGEXP_STATIC_INPUT, JSPROP_ENUMERATE,
|
|
regexp_static_getProperty, regexp_static_setProperty},
|
|
{"multiline",
|
|
REGEXP_STATIC_MULTILINE, JSPROP_ENUMERATE,
|
|
regexp_static_getProperty, regexp_static_setProperty},
|
|
{"lastMatch",
|
|
REGEXP_STATIC_LAST_MATCH, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"lastParen",
|
|
REGEXP_STATIC_LAST_PAREN, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"leftContext",
|
|
REGEXP_STATIC_LEFT_CONTEXT, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"rightContext",
|
|
REGEXP_STATIC_RIGHT_CONTEXT, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
|
|
/* XXX should have block scope and local $1, etc. */
|
|
{"$1", 0, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$2", 1, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$3", 2, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$4", 3, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$5", 4, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$6", 5, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$7", 6, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$8", 7, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
{"$9", 8, JSPROP_ENUMERATE|JSPROP_READONLY,
|
|
regexp_static_getProperty, regexp_static_getProperty},
|
|
|
|
{0}
|
|
};
|
|
|
|
static void
|
|
regexp_finalize(JSContext *cx, JSObject *obj)
|
|
{
|
|
JSRegExp *re;
|
|
|
|
re = JS_GetPrivate(cx, obj);
|
|
if (!re)
|
|
return;
|
|
js_DestroyRegExp(cx, re);
|
|
}
|
|
|
|
/* Forward static prototype. */
|
|
static JSBool
|
|
regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
|
jsval *rval);
|
|
|
|
static JSBool
|
|
regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
|
{
|
|
return regexp_exec(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, rval);
|
|
}
|
|
|
|
#if JS_HAS_XDR
|
|
|
|
#include "jsxdrapi.h"
|
|
|
|
static JSBool
|
|
regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
|
|
{
|
|
JSRegExp *re;
|
|
JSString *source;
|
|
uint8 flags;
|
|
|
|
if (xdr->mode == JSXDR_ENCODE) {
|
|
re = JS_GetPrivate(xdr->cx, *objp);
|
|
if (!re)
|
|
return JS_FALSE;
|
|
source = re->source;
|
|
flags = re->flags;
|
|
}
|
|
if (!JS_XDRString(xdr, &source) ||
|
|
!JS_XDRUint8(xdr, &flags)) {
|
|
return JS_FALSE;
|
|
}
|
|
if (xdr->mode == JSXDR_DECODE) {
|
|
*objp = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
|
|
if (!*objp)
|
|
return JS_FALSE;
|
|
re = js_NewRegExp(xdr->cx, source, flags);
|
|
if (!re)
|
|
return JS_FALSE;
|
|
if (!JS_SetPrivate(xdr->cx, *objp, re)) {
|
|
js_DestroyRegExp(xdr->cx, re);
|
|
return JS_FALSE;
|
|
}
|
|
}
|
|
return JS_TRUE;
|
|
}
|
|
|
|
#else /* !JS_HAS_XDR */
|
|
|
|
#define regexp_xdrObject NULL
|
|
|
|
#endif /* !JS_HAS_XDR */
|
|
|
|
JSClass js_RegExpClass = {
|
|
"RegExp",
|
|
JSCLASS_HAS_PRIVATE,
|
|
JS_PropertyStub, JS_PropertyStub, regexp_getProperty, regexp_setProperty,
|
|
JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, regexp_finalize,
|
|
NULL, NULL, regexp_call, NULL,
|
|
regexp_xdrObject,
|
|
};
|
|
|
|
static JSBool
|
|
regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
|
jsval *rval)
|
|
{
|
|
JSBool ok;
|
|
JSRegExp *re;
|
|
jschar *chars;
|
|
size_t length, nflags;
|
|
uintN flags;
|
|
JSString *str;
|
|
|
|
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
|
|
return JS_FALSE;
|
|
ok = JS_TRUE;
|
|
JS_LOCK_OBJ(cx, obj);
|
|
re = JS_GetPrivate(cx, obj);
|
|
if (!re) {
|
|
*rval = STRING_TO_JSVAL(cx->runtime->emptyString);
|
|
goto out;
|
|
}
|
|
|
|
length = re->source->length + 2;
|
|
nflags = 0;
|
|
for (flags = re->flags; flags != 0; flags &= flags - 1)
|
|
nflags++;
|
|
chars = JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
|
|
if (!chars) {
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
|
|
chars[0] = '/';
|
|
js_strncpy(&chars[1], re->source->chars, length - 2);
|
|
chars[length-1] = '/';
|
|
if (nflags) {
|
|
if (re->flags & JSREG_GLOB)
|
|
chars[length++] = 'g';
|
|
if (re->flags & JSREG_FOLD)
|
|
chars[length++] = 'i';
|
|
}
|
|
chars[length] = 0;
|
|
|
|
str = js_NewString(cx, chars, length, 0);
|
|
if (!str) {
|
|
JS_free(cx, chars);
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
*rval = STRING_TO_JSVAL(str);
|
|
out:
|
|
JS_UNLOCK_OBJ(cx, obj);
|
|
return ok;
|
|
}
|
|
|
|
static JSBool
|
|
regexp_compile(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
|
jsval *rval)
|
|
{
|
|
JSString *opt, *str;
|
|
JSRegExp *oldre, *re;
|
|
JSBool ok;
|
|
|
|
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
|
|
return JS_FALSE;
|
|
opt = NULL;
|
|
JS_LOCK_OBJ(cx, obj);
|
|
if (argc == 0) {
|
|
str = cx->runtime->emptyString;
|
|
} else {
|
|
str = js_ValueToString(cx, argv[0]);
|
|
if (!str) {
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
argv[0] = STRING_TO_JSVAL(str);
|
|
if (argc > 1) {
|
|
opt = js_ValueToString(cx, argv[1]);
|
|
if (!opt) {
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
argv[1] = STRING_TO_JSVAL(opt);
|
|
}
|
|
}
|
|
re = js_NewRegExpOpt(cx, str, opt);
|
|
if (!re) {
|
|
ok = JS_FALSE;
|
|
goto out;
|
|
}
|
|
oldre = JS_GetPrivate(cx, obj);
|
|
ok = JS_SetPrivate(cx, obj, re);
|
|
if (!ok) {
|
|
js_DestroyRegExp(cx, re);
|
|
goto out;
|
|
}
|
|
if (oldre)
|
|
js_DestroyRegExp(cx, oldre);
|
|
*rval = OBJECT_TO_JSVAL(obj);
|
|
out:
|
|
JS_UNLOCK_OBJ(cx, obj);
|
|
return ok;
|
|
}
|
|
|
|
static JSBool
|
|
regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
|
|
JSBool test, jsval *rval)
|
|
{
|
|
JSBool ok, locked;
|
|
JSRegExp *re;
|
|
JSString *str;
|
|
size_t i;
|
|
|
|
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
|
|
return JS_FALSE;
|
|
re = JS_GetPrivate(cx, obj);
|
|
if (!re)
|
|
return JS_TRUE;
|
|
ok = locked = JS_FALSE;
|
|
if (argc == 0) {
|
|
str = cx->regExpStatics.input;
|
|
if (!str) {
|
|
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
|
|
JSMSG_NO_INPUT,
|
|
JS_GetStringBytes(re->source),
|
|
(re->flags & JSREG_GLOB) ? "g" : "",
|
|
(re->flags & JSREG_FOLD) ? "i" : "");
|
|
goto out;
|
|
}
|
|
} else {
|
|
str = js_ValueToString(cx, argv[0]);
|
|
if (!str)
|
|
goto out;
|
|
argv[0] = STRING_TO_JSVAL(str);
|
|
}
|
|
if (re->flags & JSREG_GLOB) {
|
|
JS_LOCK_OBJ(cx, obj);
|
|
locked = JS_TRUE;
|
|
i = re->lastIndex;
|
|
} else {
|
|
i = 0;
|
|
}
|
|
ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
|
|
if (re->flags & JSREG_GLOB)
|
|
re->lastIndex = (*rval == JSVAL_NULL) ? 0 : i;
|
|
out:
|
|
if (locked)
|
|
JS_UNLOCK_OBJ(cx, obj);
|
|
return ok;
|
|
}
|
|
|
|
static JSBool
|
|
regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
|
{
|
|
return regexp_exec_sub(cx, obj, argc, argv, JS_FALSE, rval);
|
|
}
|
|
|
|
static JSBool
|
|
regexp_test(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
|
{
|
|
if (!regexp_exec_sub(cx, obj, argc, argv, JS_TRUE, rval))
|
|
return JS_FALSE;
|
|
if (*rval != JSVAL_TRUE)
|
|
*rval = JSVAL_FALSE;
|
|
return JS_TRUE;
|
|
}
|
|
|
|
static JSFunctionSpec regexp_methods[] = {
|
|
#if JS_HAS_TOSOURCE
|
|
{js_toSource_str, regexp_toString, 0},
|
|
#endif
|
|
{js_toString_str, regexp_toString, 0},
|
|
{"compile", regexp_compile, 1},
|
|
{"exec", regexp_exec, 0},
|
|
{"test", regexp_test, 0},
|
|
{0}
|
|
};
|
|
|
|
static JSBool
|
|
RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
|
|
{
|
|
/* If not constructing, replace obj with a new RegExp object. */
|
|
if (!cx->fp->constructing) {
|
|
obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
|
|
if (!obj)
|
|
return JS_FALSE;
|
|
}
|
|
return regexp_compile(cx, obj, argc, argv, rval);
|
|
}
|
|
|
|
JSObject *
|
|
js_InitRegExpClass(JSContext *cx, JSObject *obj)
|
|
{
|
|
JSObject *proto, *ctor;
|
|
|
|
proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
|
|
regexp_props, regexp_methods,
|
|
regexp_static_props, NULL);
|
|
|
|
if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
|
|
return NULL;
|
|
if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
|
|
!JS_AliasProperty(cx, ctor, "multiline", "$*") ||
|
|
!JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
|
|
!JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
|
|
!JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
|
|
!JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
|
|
goto bad;
|
|
}
|
|
return proto;
|
|
|
|
bad:
|
|
JS_DeleteProperty(cx, obj, js_RegExpClass.name);
|
|
return NULL;
|
|
}
|
|
|
|
JSObject *
|
|
js_NewRegExpObject(JSContext *cx, jschar *chars, size_t length, uintN flags)
|
|
{
|
|
JSString *str;
|
|
JSObject *obj;
|
|
JSRegExp *re;
|
|
|
|
str = js_NewStringCopyN(cx, chars, length, 0);
|
|
if (!str)
|
|
return NULL;
|
|
re = js_NewRegExp(cx, str, flags);
|
|
if (!re)
|
|
return NULL;
|
|
obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
|
|
if (!obj || !JS_SetPrivate(cx, obj, re)) {
|
|
js_DestroyRegExp(cx, re);
|
|
return NULL;
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
#endif /* JS_HAS_REGEXPS */
|