mirror of
https://github.com/vxcontrol/lualibs-lrexlib.git
synced 2026-07-01 09:25:08 -04:00
Remove the ALG_USERETRY (mis)feature.
This commit is contained in:
+8
-93
@@ -9,6 +9,8 @@
|
||||
static void gmatch_pushsubject (lua_State *L, TArgExec *argE);
|
||||
static int findmatch_exec (TUserdata *ud, TArgExec *argE);
|
||||
static int split_exec (TUserdata *ud, TArgExec *argE, int offset);
|
||||
static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset);
|
||||
static int gmatch_exec (TUserdata *ud, TArgExec *argE);
|
||||
static int compile_regex (lua_State *L, const TArgComp *argC, TUserdata **pud);
|
||||
static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
|
||||
|
||||
@@ -34,33 +36,6 @@ static int generate_error (lua_State *L, const TUserdata *ud, int errcode);
|
||||
#define DO_NAMED_SUBPATTERNS(a,b,c)
|
||||
#endif
|
||||
|
||||
/* When doing an iterative search, there can occur a situation of a zero-length
|
||||
* match at the current position, that prevents further advance on the subject
|
||||
* string.
|
||||
* There are two ways to handle that (AFAIK):
|
||||
* a) Advance by one character (continue the search from the next position),
|
||||
* or
|
||||
* b) Search for a non-zero-length match that begins from the current
|
||||
* position ("retry" the search). If the match is not found then advance
|
||||
* by one character.
|
||||
* The "b)" seems more correct, but most regex libraries expose no API for that.
|
||||
* The known exception is PCRE that has flags PCRE_NOTEMPTY and PCRE_ANCHORED.
|
||||
*/
|
||||
#ifdef ALG_USERETRY
|
||||
#define SET_RETRY(a,b) (a=b)
|
||||
static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset, int retry);
|
||||
static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry);
|
||||
#define GSUB_EXEC gsub_exec
|
||||
#define GMATCH_EXEC gmatch_exec
|
||||
#else
|
||||
#define SET_RETRY(a,b) ((void)a)
|
||||
static int gsub_exec (TUserdata *ud, TArgExec *argE, int offset);
|
||||
static int gmatch_exec (TUserdata *ud, TArgExec *argE);
|
||||
#define GSUB_EXEC(a,b,c,d) gsub_exec(a,b,c)
|
||||
#define GMATCH_EXEC(a,b,c) gmatch_exec(a,b)
|
||||
#endif
|
||||
|
||||
|
||||
#define METHOD_FIND 0
|
||||
#define METHOD_MATCH 1
|
||||
#define METHOD_EXEC 2
|
||||
@@ -253,7 +228,7 @@ static int algf_gsub (lua_State *L) {
|
||||
TUserdata *ud;
|
||||
TArgComp argC;
|
||||
TArgExec argE;
|
||||
int n_match = 0, n_subst = 0, st = 0, retry;
|
||||
int n_match = 0, n_subst = 0, st = 0;
|
||||
TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut;
|
||||
TFreeList freelist;
|
||||
/*------------------------------------------------------------------*/
|
||||
@@ -276,22 +251,11 @@ static int algf_gsub (lua_State *L) {
|
||||
}
|
||||
/*------------------------------------------------------------------*/
|
||||
buffer_init (&BufOut, 1024, L, &freelist);
|
||||
SET_RETRY (retry, 0);
|
||||
while ((argE.maxmatch < 0 || n_match < argE.maxmatch) && st <= (int)argE.textlen) {
|
||||
int from, to, res;
|
||||
int curr_subst = 0;
|
||||
res = GSUB_EXEC (ud, &argE, st, retry);
|
||||
res = gsub_exec (ud, &argE, st);
|
||||
if (ALG_NOMATCH (res)) {
|
||||
#ifdef ALG_USERETRY
|
||||
if (retry) {
|
||||
if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
|
||||
buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
|
||||
st += ALG_CHARSIZE;
|
||||
retry = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
else if (!ALG_ISMATCH (res)) {
|
||||
@@ -406,16 +370,11 @@ static int algf_gsub (lua_State *L) {
|
||||
n_subst += curr_subst;
|
||||
if (st < to) {
|
||||
st = to;
|
||||
SET_RETRY (retry, 0);
|
||||
}
|
||||
else if (st < (int)argE.textlen) {
|
||||
#ifdef ALG_USERETRY
|
||||
retry = 1;
|
||||
#else
|
||||
/* advance by 1 char (not replaced) */
|
||||
buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
|
||||
st += ALG_CHARSIZE;
|
||||
#endif
|
||||
}
|
||||
else break;
|
||||
}
|
||||
@@ -433,7 +392,7 @@ static int algf_count (lua_State *L) {
|
||||
TUserdata *ud;
|
||||
TArgComp argC;
|
||||
TArgExec argE;
|
||||
int n_match = 0, st = 0, retry;
|
||||
int n_match = 0, st = 0;
|
||||
/*------------------------------------------------------------------*/
|
||||
checkarg_count (L, &argC, &argE);
|
||||
if (argC.ud) {
|
||||
@@ -442,20 +401,10 @@ static int algf_count (lua_State *L) {
|
||||
}
|
||||
else compile_regex (L, &argC, &ud);
|
||||
/*------------------------------------------------------------------*/
|
||||
SET_RETRY (retry, 0);
|
||||
while (st <= (int)argE.textlen) {
|
||||
int to, res;
|
||||
res = GSUB_EXEC (ud, &argE, st, retry);
|
||||
res = gsub_exec (ud, &argE, st);
|
||||
if (ALG_NOMATCH (res)) {
|
||||
#ifdef ALG_USERETRY
|
||||
if (retry) {
|
||||
if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
|
||||
st += ALG_CHARSIZE;
|
||||
retry = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
else if (!ALG_ISMATCH (res)) {
|
||||
@@ -473,15 +422,10 @@ static int algf_count (lua_State *L) {
|
||||
/*----------------------------------------------------------------*/
|
||||
if (st < to) {
|
||||
st = to;
|
||||
SET_RETRY (retry, 0);
|
||||
}
|
||||
else if (st < (int)argE.textlen) {
|
||||
#ifdef ALG_USERETRY
|
||||
retry = 1;
|
||||
#else
|
||||
/* advance by 1 char (not replaced) */
|
||||
st += ALG_CHARSIZE;
|
||||
#endif
|
||||
}
|
||||
else break;
|
||||
}
|
||||
@@ -543,39 +487,24 @@ static int algf_match (lua_State *L) {
|
||||
|
||||
|
||||
static int gmatch_iter (lua_State *L) {
|
||||
int retry;
|
||||
TArgExec argE;
|
||||
TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
|
||||
argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
|
||||
argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
|
||||
argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
|
||||
#ifdef ALG_USERETRY
|
||||
retry = lua_tointeger (L, lua_upvalueindex (5));
|
||||
#endif
|
||||
|
||||
if (argE.startoffset > (int)argE.textlen)
|
||||
return 0;
|
||||
|
||||
while (1) {
|
||||
int res = GMATCH_EXEC (ud, &argE, retry);
|
||||
int res = gmatch_exec (ud, &argE);
|
||||
if (ALG_ISMATCH (res)) {
|
||||
int incr = 0;
|
||||
if (ALG_SUBLEN(ud,0)) {
|
||||
SET_RETRY (retry, 0);
|
||||
}
|
||||
else { /* no progress: prevent endless loop */
|
||||
#ifdef ALG_USERETRY
|
||||
SET_RETRY (retry, 1);
|
||||
#else
|
||||
if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
|
||||
incr = ALG_CHARSIZE;
|
||||
#endif
|
||||
}
|
||||
lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */
|
||||
lua_replace (L, lua_upvalueindex (4));
|
||||
#ifdef ALG_USERETRY
|
||||
lua_pushinteger (L, retry);
|
||||
lua_replace (L, lua_upvalueindex (5)); /* update retry */
|
||||
#endif
|
||||
/* push either captures or entire match */
|
||||
if (ALG_NSUB(ud)) {
|
||||
push_substrings (L, ud, argE.text, NULL);
|
||||
@@ -587,15 +516,6 @@ static int gmatch_iter (lua_State *L) {
|
||||
}
|
||||
}
|
||||
else if (ALG_NOMATCH (res)) {
|
||||
#ifdef ALG_USERETRY
|
||||
if (retry) {
|
||||
if (argE.startoffset < (int)argE.textlen) {
|
||||
++argE.startoffset; /* advance by 1 char */
|
||||
SET_RETRY (retry, 0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
@@ -665,12 +585,7 @@ static int algf_gmatch (lua_State *L)
|
||||
gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
|
||||
lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
|
||||
lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
|
||||
#ifdef ALG_USERETRY
|
||||
lua_pushinteger (L, 0); /* 5-th upvalue: retry */
|
||||
lua_pushcclosure (L, gmatch_iter, 5);
|
||||
#else
|
||||
lua_pushcclosure (L, gmatch_iter, 4);
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -55,7 +55,6 @@ static void checkarg_compile (lua_State *L, int pos, TArgComp *argC);
|
||||
|
||||
#define ALG_BASE(st) 0
|
||||
#define ALG_PULL
|
||||
/* #define ALG_USERETRY */
|
||||
|
||||
typedef struct {
|
||||
regex_t *reg;
|
||||
|
||||
+8
-25
@@ -57,7 +57,6 @@ static void checkarg_compile (lua_State *L, int pos, TArgComp *argC);
|
||||
|
||||
#define ALG_BASE(st) 0
|
||||
#define ALG_PULL
|
||||
#define ALG_USERETRY
|
||||
|
||||
typedef struct {
|
||||
pcre * pr;
|
||||
@@ -296,18 +295,10 @@ static int Lpcre_dfa_exec (lua_State *L)
|
||||
}
|
||||
#endif /* #if PCRE_MAJOR >= 6 */
|
||||
|
||||
#ifdef ALG_USERETRY
|
||||
static int gmatch_exec (TUserdata *ud, TArgExec *argE, int retry) {
|
||||
int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags;
|
||||
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
|
||||
argE->startoffset, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
|
||||
}
|
||||
#else
|
||||
static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
|
||||
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
|
||||
argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
|
||||
}
|
||||
#endif
|
||||
static int gmatch_exec (TUserdata *ud, TArgExec *argE) {
|
||||
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
|
||||
argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
|
||||
}
|
||||
|
||||
static void gmatch_pushsubject (lua_State *L, TArgExec *argE) {
|
||||
lua_pushlstring (L, argE->text, argE->textlen);
|
||||
@@ -318,18 +309,10 @@ static int findmatch_exec (TPcre *ud, TArgExec *argE) {
|
||||
argE->startoffset, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
|
||||
}
|
||||
|
||||
#ifdef ALG_USERETRY
|
||||
static int gsub_exec (TPcre *ud, TArgExec *argE, int st, int retry) {
|
||||
int eflags = retry ? (argE->eflags|PCRE_NOTEMPTY|PCRE_ANCHORED) : argE->eflags;
|
||||
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
|
||||
st, eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
|
||||
}
|
||||
#else
|
||||
static int gsub_exec (TPcre *ud, TArgExec *argE, int st) {
|
||||
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
|
||||
st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
|
||||
}
|
||||
#endif
|
||||
static int gsub_exec (TPcre *ud, TArgExec *argE, int st) {
|
||||
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen,
|
||||
st, argE->eflags, ud->match, (ALG_NSUB(ud) + 1) * 3);
|
||||
}
|
||||
|
||||
static int split_exec (TPcre *ud, TArgExec *argE, int offset) {
|
||||
return pcre_exec (ud->pr, ud->extra, argE->text, argE->textlen, offset,
|
||||
|
||||
+2
-2
@@ -78,9 +78,9 @@ local function set_f_gmatch (lib, flg)
|
||||
{ {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
|
||||
{ {"", pCSV}, {{"",F}} },
|
||||
{ {"12", pCSV}, {{"12",F}} },
|
||||
{ {",", pCSV}, {{"", F},{F,""}} },
|
||||
{ {",", pCSV}, {{"", F}} },
|
||||
{ {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
|
||||
{ {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
|
||||
{ {",,12,45,,ab,", pCSV}, {{"",F},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
+3
-3
@@ -27,7 +27,7 @@ local function set_f_gsub4 (lib, flg)
|
||||
Func = get_gsub (lib),
|
||||
--{ s, p, f, n, res1, res2, res3 },
|
||||
{ {"/* */ */", "%/%*(.*)%*%/", "#" }, {"#", 1, 1} },
|
||||
{ {"a2c3", ".-", "#" }, {"#########", 9, 9} }, -- test .-
|
||||
{ {"a2c3", ".-", "#" }, {"#a#2#c#3#", 5, 5} }, -- test .-
|
||||
{ {"/**/", "%/%*(.-)%*%/", "#" }, {"#", 1, 1} },
|
||||
{ {"/* */ */", "%/%*(.-)%*%/", "#" }, {"# */", 1, 1} },
|
||||
{ {"a2c3", "%d", "#" }, {"a#c#", 2, 2} }, -- test %d
|
||||
@@ -37,9 +37,9 @@ local function set_f_gsub4 (lib, flg)
|
||||
{ {"abcd", "\\b", "%1"}, {"abcd", 2, 2} },
|
||||
{ {"", pCSV,fCSV}, {"[]", 1, 1} },
|
||||
{ {"123", pCSV,fCSV}, {"[123]", 1, 1} },
|
||||
{ {",", pCSV,fCSV}, {"[][]", 2, 2} },
|
||||
{ {",", pCSV,fCSV}, {"[],", 1, 1} },
|
||||
{ {"123,,456", pCSV,fCSV}, {"[123][][456]", 3, 3}},
|
||||
{ {",,123,456,,abc,789,", pCSV,fCSV}, {"[][][123][456][][abc][789][]", 8, 8}},
|
||||
{ {",,123,456,,abc,789,", pCSV,fCSV}, {"[],[123][456][][abc][789][]", 7, 7}},
|
||||
}
|
||||
-- convert patterns: lua -> pcre
|
||||
for _, test in ipairs (set) do
|
||||
|
||||
Reference in New Issue
Block a user