Remove the ALG_USERETRY (mis)feature.

This commit is contained in:
Shmuel Zeigerman
2014-08-25 16:49:24 +03:00
parent d8488e739a
commit 150c251be5
5 changed files with 21 additions and 124 deletions
+8 -93
View File
@@ -9,6 +9,8 @@
static void gmatch_pushsubject (lua_State *L, TArgExec *argE);
static int findmatch_exec (TUserdata *ud, TArgExec *argE);
static int split_exec (TUserdata *ud, TArgExec *argE, int offset);
static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset);
static int gmatch_exec (TUserdata *ud, TArgExec *argE);
static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud);
static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
@@ -34,33 +36,6 @@ static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
#define DO_NAMED_SUBPATTERNS(a,b,c)
#endif
/* When doing an iterative search, there can occur a situation of a zero-length
* match at the current position, that prevents further advance on the subject
* string.
* There are two ways to handle that (AFAIK):
* a) Advance by one character (continue the search from the next position),
* or
* b) Search for a non-zero-length match that begins from the current
* position ("retry" the search). If the match is not found then advance
* by one character.
* The "b)" seems more correct, but most regex libraries expose no API for that.
* The known exception is PCRE that has flags PCRE_NOTEMPTY and PCRE_ANCHORED.
*/
#ifdef ALG_USERETRY
#define SET_RETRY(a,b) (a=b)
static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset, int retry);
static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry);
#define GSUB_EXEC gsub_exec
#define GMATCH_EXEC gmatch_exec
#else
#define SET_RETRY(a,b) ((void)a)
static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset);
static int gmatch_exec (TUserdata *ud, TArgExec *argE);
#define GSUB_EXEC(a,b,c,d) gsub_exec(a,b,c)
#define GMATCH_EXEC(a,b,c) gmatch_exec(a,b)
#endif
#define METHOD_FIND 0
#define METHOD_MATCH 1
#define METHOD_EXEC 2
@@ -253,7 +228,7 @@ static int algf_gsub (lua_State *L) {
TUserdata *ud;
TArgComp argC;
TArgExec argE;
int n_match = 0, n_subst = 0, st = 0, retry;
int n_match = 0, n_subst = 0, st = 0;
TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut;
TFreeList freelist;
/*------------------------------------------------------------------*/
@@ -276,22 +251,11 @@ static int algf_gsub (lua_State *L) {
}
/*------------------------------------------------------------------*/
buffer_init (&BufOut, 1024, L, &freelist);
SET_RETRY (retry, 0);
while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (int)argE.textlen) {
int from, to, res;
int curr_subst = 0;
res = GSUB_EXEC (ud, &argE, st, retry);
res = gsub_exec (ud, &argE, st);
if (ALG_NOMATCH (res)) {
#ifdef ALG_USERETRY
if (retry) {
if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
st += ALG_CHARSIZE;
retry = 0;
continue;
}
}
#endif
break;
}
else if (!ALG_ISMATCH (res)) {
@@ -406,16 +370,11 @@ static int algf_gsub (lua_State *L) {
n_subst += curr_subst;
if (st < to) {
st = to;
SET_RETRY (retry, 0);
}
else if (st < (int)argE.textlen) {
#ifdef ALG_USERETRY
retry = 1;
#else
/* advance by 1 char (not replaced) */
buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
st += ALG_CHARSIZE;
#endif
}
else break;
}
@@ -433,7 +392,7 @@ static int algf_count (lua_State *L) {
TUserdata *ud;
TArgComp argC;
TArgExec argE;
int n_match = 0, st = 0, retry;
int n_match = 0, st = 0;
/*------------------------------------------------------------------*/
checkarg_count (L, &argC, &argE);
if (argC.ud) {
@@ -442,20 +401,10 @@ static int algf_count (lua_State *L) {
}
else compile_regex (L, &argC, &ud);
/*------------------------------------------------------------------*/
SET_RETRY (retry, 0);
while (st <= (int)argE.textlen) {
int to, res;
res = GSUB_EXEC (ud, &argE, st, retry);
res = gsub_exec (ud, &argE, st);
if (ALG_NOMATCH (res)) {
#ifdef ALG_USERETRY
if (retry) {
if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
st += ALG_CHARSIZE;
retry = 0;
continue;
}
}
#endif
break;
}
else if (!ALG_ISMATCH (res)) {
@@ -473,15 +422,10 @@ static int algf_count (lua_State *L) {
/*----------------------------------------------------------------*/
if (st < to) {
st = to;
SET_RETRY (retry, 0);
}
else if (st < (int)argE.textlen) {
#ifdef ALG_USERETRY
retry = 1;
#else
/* advance by 1 char (not replaced) */
st += ALG_CHARSIZE;
#endif
}
else break;
}
@@ -543,39 +487,24 @@ static int algf_match (lua_State *L) {
static int gmatch_iter (lua_State *L) {
int retry;
TArgExec argE;
TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
#ifdef ALG_USERETRY
retry = lua_tointeger (L, lua_upvalueindex (5));
#endif
if (argE.startoffset > (int)argE.textlen)
return 0;
while (1) {
int res = GMATCH_EXEC (ud, &argE, retry);
int res = gmatch_exec (ud, &argE);
if (ALG_ISMATCH (res)) {
int incr = 0;
if (ALG_SUBLEN(ud,0)) {
SET_RETRY (retry, 0);
}
else { /* no progress: prevent endless loop */
#ifdef ALG_USERETRY
SET_RETRY (retry, 1);
#else
if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
incr = ALG_CHARSIZE;
#endif
}
lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */
lua_replace (L, lua_upvalueindex (4));
#ifdef ALG_USERETRY
lua_pushinteger (L, retry);
lua_replace (L, lua_upvalueindex (5)); /* update retry */
#endif
/* push either captures or entire match */
if (ALG_NSUB(ud)) {
push_substrings (L, ud, argE.text, NULL);
@@ -587,15 +516,6 @@ static int gmatch_iter (lua_State *L) {
}
}
else if (ALG_NOMATCH (res)) {
#ifdef ALG_USERETRY
if (retry) {
if (argE.startoffset < (int)argE.textlen) {
++argE.startoffset; /* advance by 1 char */
SET_RETRY (retry, 0);
continue;
}
}
#endif
return 0;
}
else
@@ -665,12 +585,7 @@ static int algf_gmatch (lua_State *L)
gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
#ifdef ALG_USERETRY
lua_pushinteger (L, 0); /* 5-th upvalue: retry */
lua_pushcclosure (L, gmatch_iter, 5);
#else
lua_pushcclosure (L, gmatch_iter, 4);
#endif
return 1;
}
-1
View File
@@ -55,7 +55,6 @@ static void checkarg_compile (lua_State *L, int pos, TArgComp *argC);
#define ALG_BASE(st) 0
#define ALG_PULL
/* #define ALG_USERETRY */
typedef struct {
regex_t *reg;
+8 -25
View File
@@ -57,7 +57,6 @@ static void checkarg_compile (lua_State *L, int pos, TArgComp *argC);
#define ALG_BASE(st) 0
#define ALG_PULL
#define ALG_USERETRY
typedef struct {
pcre * pr;
@@ -296,18 +295,10 @@ static int Lpcre_dfa_exec (lua_State *L)
}
#endif /* #if PCRE_MAJOR >= 6 */
#ifdef ALG_USERETRY
static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry) {
int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags;
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
argE->startoffset, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
}
#else
static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
}
#endif
static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
}
static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
lua_pushlstring (L, argE->text, argE->textlen);
@@ -318,18 +309,10 @@ static int findmatch_exec (TPcre *ud, TArgExec *argE) {
argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
}
#ifdef ALG_USERETRY
static int gsub_exec (TPcre *ud, TArgExec *argE, int st, int retry) {
int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags;
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
st, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
}
#else
static int gsub_exec (TPcre *ud, TArgExec *argE, int st) {
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
}
#endif
static int gsub_exec (TPcre *ud, TArgExec *argE, int st) {
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
}
static int split_exec (TPcre *ud, TArgExec *argE, int offset) {
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset,
+2 -2
View File
@@ -78,9 +78,9 @@ local function set_f_gmatch (lib, flg)
{ {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
{ {"", pCSV}, {{"",F}} },
{ {"12", pCSV}, {{"12",F}} },
{ {",", pCSV}, {{"", F},{F,""}} },
{ {",", pCSV}, {{"", F}} },
{ {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
{ {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
{ {",,12,45,,ab,", pCSV}, {{"",F},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
}
end
+3 -3
View File
@@ -27,7 +27,7 @@ local function set_f_gsub4 (lib, flg)
Func = get_gsub (lib),
--{ s, p, f, n, res1, res2, res3 },
{ {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} },
{ {"a2c3", ".-", "#" }, {"#########", 9, 9} }, -- test .-
{ {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5, 5} }, -- test .-
{ {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} },
{ {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} },
{ {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d
@@ -37,9 +37,9 @@ local function set_f_gsub4 (lib, flg)
{ {"abcd", "\\b", "%1"}, {"abcd", 2, 2} },
{ {"", pCSV,fCSV}, {"[]", 1, 1} },
{ {"123", pCSV,fCSV}, {"[123]", 1, 1} },
{ {",", pCSV,fCSV}, {"[][]", 2, 2} },
{ {",", pCSV,fCSV}, {"[],", 1, 1} },
{ {"123,,456", pCSV,fCSV}, {"[123][][456]", 3, 3}},
{ {",,123,456,,abc,789,", pCSV,fCSV}, {"[][][123][456][][abc][789][]", 8, 8}},
{ {",,123,456,,abc,789,", pCSV,fCSV}, {"[],[123][456][][abc][789][]", 7, 7}},
}
-- convert patterns: lua -> pcre
for _, test in ipairs (set) do