In the functions searching for multiple matches every empty match adjacent to the previous match is discarded.

This commit is contained in:
Shmuel Zeigerman
2015-02-26 19:52:07 +02:00
parent a5c2a17019
commit 0684b19e44
6 changed files with 140 additions and 92 deletions
+4
View File
@@ -88,6 +88,10 @@ Notes
9. The notation *larg...* is used to indicate optional library-specific
arguments, which are documented in the ``new`` method of each library.
10. In the functions searching for multiple matches (``gmatch``, ``gsub``,
``split``, ``count``) every empty match adjacent to the previous match
is discarded, e.g. ``rex.count("abc",".*")`` will return 1.
------------------------------------------------------------
Functions and methods common to all bindings
+83 -53
View File
@@ -228,7 +228,7 @@ static int algf_gsub (lua_State *L) {
TUserdata *ud;
TArgComp argC;
TArgExec argE;
int n_match = 0, n_subst = 0, st = 0;
int n_match = 0, n_subst = 0, st = 0, last_to = -1;
TBuffer BufOut, BufRep, BufTemp, *pBuf = &BufOut;
TFreeList freelist;
/*------------------------------------------------------------------*/
@@ -262,9 +262,18 @@ static int algf_gsub (lua_State *L) {
freelist_free (&freelist);
return generate_error (L, ud, res);
}
++n_match;
from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
to = ALG_BASE(st) + ALG_SUBEND(ud,0);
if (to == last_to) { /* discard an empty match adjacent to the previous match */
if (st < (int)argE.textlen) { /* advance by 1 char (not replaced) */
buffer_addlstring (&BufOut, argE.text + st, ALG_CHARSIZE);
st += ALG_CHARSIZE;
continue;
}
break;
}
last_to = to;
++n_match;
if (st < from) {
buffer_addlstring (&BufOut, argE.text + st, from - st);
#ifdef ALG_PULL
@@ -392,7 +401,7 @@ static int algf_count (lua_State *L) {
TUserdata *ud;
TArgComp argC;
TArgExec argE;
int n_match = 0, st = 0;
int n_match = 0, st = 0, last_to = -1;
/*------------------------------------------------------------------*/
checkarg_count (L, &argC, &argE);
if (argC.ud) {
@@ -410,8 +419,16 @@ static int algf_count (lua_State *L) {
else if (!ALG_ISMATCH (res)) {
return generate_error (L, ud, res);
}
++n_match;
to = ALG_BASE(st) + ALG_SUBEND(ud,0);
if (to == last_to) { /* discard an empty match adjacent to the previous match */
if (st < (int)argE.textlen) { /* advance by 1 char */
st += ALG_CHARSIZE;
continue;
}
break;
}
last_to = to;
++n_match;
#ifdef ALG_PULL
{
int from = ALG_BASE(st) + ALG_SUBBEG(ud,0);
@@ -487,24 +504,32 @@ static int algf_match (lua_State *L) {
static int gmatch_iter (lua_State *L) {
int last_end, res;
TArgExec argE;
TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
if (argE.startoffset > (int)argE.textlen)
return 0;
last_end = lua_tointeger (L, lua_upvalueindex (5));
while (1) {
int res = gmatch_exec (ud, &argE);
if (argE.startoffset > (int)argE.textlen)
return 0;
res = gmatch_exec (ud, &argE);
if (ALG_ISMATCH (res)) {
int incr = 0;
if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
argE.startoffset += ALG_CHARSIZE;
continue;
}
incr = ALG_CHARSIZE;
}
lua_pushinteger(L, ALG_BASE(argE.startoffset) + incr + ALG_SUBEND(ud,0)); /* update start offset */
last_end = ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0);
lua_pushinteger(L, last_end + incr); /* update start offset */
lua_replace (L, lua_upvalueindex (4));
lua_pushinteger(L, last_end); /* update last end of match */
lua_replace (L, lua_upvalueindex (5));
/* push either captures or entire match */
if (ALG_NSUB(ud)) {
push_substrings (L, ud, argE.text, NULL);
@@ -515,9 +540,8 @@ static int gmatch_iter (lua_State *L) {
return 1;
}
}
else if (ALG_NOMATCH (res)) {
else if (ALG_NOMATCH (res))
return 0;
}
else
return generate_error (L, ud, res);
}
@@ -525,47 +549,55 @@ static int gmatch_iter (lua_State *L) {
static int split_iter (lua_State *L) {
int incr, newoffset, res;
int incr, last_end, newoffset, res;
TArgExec argE;
TUserdata *ud = (TUserdata*) lua_touserdata (L, lua_upvalueindex (1));
argE.text = lua_tolstring (L, lua_upvalueindex (2), &argE.textlen);
argE.eflags = lua_tointeger (L, lua_upvalueindex (3));
argE.startoffset = lua_tointeger (L, lua_upvalueindex (4));
incr = lua_tointeger (L, lua_upvalueindex (5));
last_end = lua_tointeger (L, lua_upvalueindex (6));
if (argE.startoffset > (int)argE.textlen)
if (incr < 0)
return 0;
if ((newoffset = argE.startoffset + incr) > (int)argE.textlen)
goto nomatch;
res = split_exec (ud, &argE, newoffset);
if (ALG_ISMATCH (res)) {
lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset */
lua_replace (L, lua_upvalueindex (4));
lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
lua_replace (L, lua_upvalueindex (5));
/* push text preceding the match */
lua_pushlstring (L, argE.text + argE.startoffset,
ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
/* push either captures or entire match */
if (ALG_NSUB(ud)) {
push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
return 1 + ALG_NSUB(ud);
}
else {
ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
return 2;
while (1) {
if ((newoffset = argE.startoffset + incr) > (int)argE.textlen)
break;
res = split_exec (ud, &argE, newoffset);
if (ALG_ISMATCH (res)) {
if (!ALG_SUBLEN(ud,0)) { /* no progress: prevent endless loop */
if (last_end == ALG_BASE(argE.startoffset) + ALG_SUBEND(ud,0)) {
incr += ALG_CHARSIZE;
continue;
}
}
lua_pushinteger(L, ALG_BASE(newoffset) + ALG_SUBEND(ud,0)); /* update start offset and last_end */
lua_pushvalue (L, -1);
lua_replace (L, lua_upvalueindex (4));
lua_replace (L, lua_upvalueindex (6));
lua_pushinteger (L, ALG_SUBLEN(ud,0) ? 0 : ALG_CHARSIZE); /* update incr */
lua_replace (L, lua_upvalueindex (5));
/* push text preceding the match */
lua_pushlstring (L, argE.text + argE.startoffset,
ALG_SUBBEG(ud,0) + ALG_BASE(newoffset) - argE.startoffset);
/* push either captures or entire match */
if (ALG_NSUB(ud)) {
push_substrings (L, ud, argE.text + ALG_BASE(newoffset), NULL);
return 1 + ALG_NSUB(ud);
}
else {
ALG_PUSHSUB (L, ud, argE.text + ALG_BASE(newoffset), 0);
return 2;
}
}
else if (ALG_NOMATCH (res))
break;
else
return generate_error (L, ud, res);
}
else if (ALG_NOMATCH (res))
goto nomatch;
else
return generate_error (L, ud, res);
nomatch:
lua_pushinteger (L, argE.textlen + 1); /* mark as last iteration */
lua_replace (L, lua_upvalueindex (4)); /* update start offset */
lua_pushinteger (L, -1); /* mark as last iteration */
lua_replace (L, lua_upvalueindex (5)); /* incr = -1 */
lua_pushlstring (L, argE.text+argE.startoffset, argE.textlen-argE.startoffset);
return 1;
}
@@ -575,17 +607,16 @@ static int algf_gmatch (lua_State *L)
{
TArgComp argC;
TArgExec argE;
TUserdata *ud;
checkarg_gmatch_split (L, &argC, &argE);
if (argC.ud) {
ud = (TUserdata*) argC.ud;
if (argC.ud)
lua_pushvalue (L, 2);
}
else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
else
compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */
gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
lua_pushcclosure (L, gmatch_iter, 4);
lua_pushinteger (L, -1); /* 5-th upvalue: last end of match */
lua_pushcclosure (L, gmatch_iter, 5);
return 1;
}
@@ -593,18 +624,17 @@ static int algf_split (lua_State *L)
{
TArgComp argC;
TArgExec argE;
TUserdata *ud;
checkarg_gmatch_split (L, &argC, &argE);
if (argC.ud) {
ud = (TUserdata*) argC.ud;
if (argC.ud)
lua_pushvalue (L, 2);
}
else compile_regex (L, &argC, &ud); /* 1-st upvalue: ud */
else
compile_regex (L, &argC, NULL); /* 1-st upvalue: ud */
gmatch_pushsubject (L, &argE); /* 2-nd upvalue: s */
lua_pushinteger (L, argE.eflags); /* 3-rd upvalue: ef */
lua_pushinteger (L, 0); /* 4-th upvalue: startoffset */
lua_pushinteger (L, 0); /* 5-th upvalue: incr */
lua_pushcclosure (L, split_iter, 5);
lua_pushinteger (L, -1); /* 6-th upvalue: last_end */
lua_pushcclosure (L, split_iter, 6);
return 1;
}
+3 -3
View File
@@ -33,7 +33,7 @@ local function set_f_gmatch (lib, flg)
--{ subj patt results }
{ {"ab", lib.new"."}, {{"a",N}, {"b",N} } },
{ {("abcd"):rep(3), "(.)b.(d)"}, {{"a","d"},{"a","d"},{"a","d"}} },
{ {"abcd", ".*" }, {{"abcd",N},{"",N} } },--zero-length match
{ {"abcd", ".*" }, {{"abcd",N} } },--zero-length match
{ {"abc", "^." }, {{"a",N}} },--anchored pattern
}
end
@@ -45,7 +45,7 @@ local function set_f_count (lib, flg)
--{ subj patt results }
{ {"ab", lib.new"."}, { 2 } },
{ {("abcd"):rep(3), "(.)b.(d)"}, { 3 } },
{ {"abcd", ".*" }, { 2 } },
{ {"abcd", ".*" }, { 1 } },
{ {"abc", "^." }, { 1 } },
}
end
@@ -229,7 +229,7 @@ local function set_f_gsub4 (lib, flg)
--{ s, p, f, n, res1, res2, res3 },
{ {"a2c3", ".", "#" }, {"####", 4, 4} }, -- test .
{ {"a2c3", ".+", "#" }, {"#", 1, 1} }, -- test .+
{ {"a2c3", ".*", "#" }, {"##", 2, 2} }, -- test .*
{ {"a2c3", ".*", "#" }, {"#", 1, 1} }, -- test .*
{ {"/* */ */", "\\/\\*(.*)\\*\\/", "#" }, {"#", 1, 1} },
{ {"a2c3", "[0-9]", "#" }, {"a#c#", 2, 2} }, -- test %d
{ {"a2c3", "[^0-9]", "#" }, {"#2#3", 2, 2} }, -- test %D
+17 -12
View File
@@ -58,7 +58,7 @@ end
local function set_f_gmatch (lib, flg)
-- gmatch (s, p, [cf], [ef])
local pCSV = "(^[^,]*)|,([^,]*)"
local pCSV = "[^,]*"
local F = false
local function test_gmatch (subj, patt)
local out, guard = {}, 10
@@ -72,13 +72,15 @@ local function set_f_gmatch (lib, flg)
return {
Name = "Function gmatch",
Func = test_gmatch,
--{ subj patt results }
{ {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
{ {"", pCSV}, {{"",F}} },
{ {"12", pCSV}, {{"12",F}} },
----{ {",", pCSV}, {{"", F},{F,""}} },
{ {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
----{ {",,12,45,,ab,", pCSV}, {{"",F},{F,""},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
--{ subj patt results }
{ {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
{ {"", pCSV}, {{"",N}} },
{ {"12", pCSV}, {{"12",N}} },
{ {",", pCSV}, {{"", N},{"", N}} },
{ {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} },
{ {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} },
{ {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} },
{ {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} },
}
end
@@ -98,10 +100,13 @@ local function set_f_split (lib, flg)
Func = test_split,
--{ subj patt results }
{ {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
{ {"ab", "$"}, {{"ab","",N}, {"",N,N}, } },
{ {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
{ {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } },
{ {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
{ {"ab", "$"}, {{"ab","",N}, {"",N,N} } },
{ {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
{ {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } },
{ {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
{ {"ab", ".*" }, {{"","ab",N}, {"",N,N} } },
{ {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } },
{ {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }},
}
end
+17 -12
View File
@@ -61,7 +61,7 @@ end
local function set_f_gmatch (lib, flg)
-- gmatch (s, p, [cf], [ef])
local pCSV = "(^[^,]*)|,([^,]*)"
local pCSV = "[^,]*"
local F = false
local function test_gmatch (subj, patt)
local out, guard = {}, 10
@@ -75,13 +75,15 @@ local function set_f_gmatch (lib, flg)
return {
Name = "Function gmatch",
Func = test_gmatch,
--{ subj patt results }
{ {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
{ {"", pCSV}, {{"",F}} },
{ {"12", pCSV}, {{"12",F}} },
{ {",", pCSV}, {{"", F}} },
{ {"12,,45", pCSV}, {{"12",F},{F,""},{F,"45"}} },
{ {",,12,45,,ab,", pCSV}, {{"",F},{F,"12"},{F,"45"},{F,""},{F,"ab"},{F,""}} },
--{ subj patt results }
{ {"a\0c", "." }, {{"a",N},{"\0",N},{"c",N}} },--nuls in subj
{ {"", pCSV}, {{"",N}} },
{ {"12", pCSV}, {{"12",N}} },
{ {",", pCSV}, {{"", N},{"", N}} },
{ {"12,,45", pCSV}, {{"12",N},{"",N},{"45",N}} },
{ {",,12,45,,ab,", pCSV}, {{"",N},{"",N},{"12",N},{"45",N},{"",N},{"ab",N},{"",N}} },
{ {"12345", "(.)(.)"}, {{"1","2"},{"3","4"}} },
{ {"12345", "(.)(.?)"}, {{"1","2"},{"3","4"},{"5",""}} },
}
end
@@ -101,10 +103,13 @@ local function set_f_split (lib, flg)
Func = test_split,
--{ subj patt results }
{ {"a,\0,c", ","}, {{"a",",",N},{"\0",",",N},{"c",N,N}, } },--nuls in subj
{ {"ab", "$"}, {{"ab","",N}, {"",N,N}, } },
{ {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
{ {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N},{"",N,N}, } },
{ {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N}, } },
{ {"ab", "$"}, {{"ab","",N}, {"",N,N} } },
{ {"ab", "^|$"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
{ {"ab45ab","(?<=ab).*?"}, {{"ab","",N}, {"45ab","",N}, {"",N,N} } },
{ {"ab", "\\b"}, {{"", "", N}, {"ab","",N}, {"",N,N} } },
{ {"ab", ".*" }, {{"","ab",N}, {"",N,N} } },
{ {"ab", ".*?" }, {{"","",N}, {"a","",N}, {"b","",N}, {"",N,N} } },
{ {"ab;de", ";*" }, {{"","",N},{"a","",N},{"b",";",N},{"d","",N},{"e","",N},{"",N,N} }},
}
end
+16 -12
View File
@@ -6,27 +6,24 @@ VERSION = 2.7.2
# Target Lua version (51 for Lua 5.1; 52 for Lua 5.2).
LUAVERSION = 51
LUADOTVERSION = $(subst 5,5.,$(LUAVERSION))
# INSTALLPATH : Path to install the built DLL.
# LUADLL : Name of Lua DLL to link to (.dll should be omitted).
# LUAEXE : Name of Lua interpreter.
# LUAINC : Path of Lua include files.
# LIBPATH : Path of lua5.1.dll, lua52.dll, pcre.dll, etc.
# LIBPATH : Path of lua51.dll, lua52.dll, pcre.dll, etc.
LIBPATH = c:\exe32
INSTALLPATH = s:\exe\lib32\lua\$(LUADOTVERSION)
LUADLL = lua$(LUAVERSION)
LUAINC = s:\progr\work\system\include\lua\$(LUADOTVERSION)
LIBPATH = c:\exe32
ifeq ($(LUAVERSION),51)
INSTALLPATH = s:\exe\lib32\lua\5.1
LUADLL = lua5.1
LUAEXE = lua.exe
LUAINC = s:\progr\work\system\include\lua\5.1
MYCFLAGS += -DREX_CREATEGLOBALVAR
CREATEGLOBAL = -DREX_CREATEGLOBALVAR
else
INSTALLPATH = s:\exe\lib32\lua\5.2
LUADLL = lua52
LUAEXE = lua52.exe
LUAINC = s:\progr\work\system\include\lua\5.2
# MYCFLAGS += -DREX_CREATEGLOBALVAR
LUAEXE = lua$(LUAVERSION).exe
endif
# --------------------------------------------------------------------------
@@ -34,8 +31,11 @@ endif
BIN = $(PROJECT).dll
BININSTALL = $(INSTALLPATH)\$(BIN)
CC = mingw32-gcc
AR = ar rcu
RANLIB = ranlib
CFLAGS = -W -Wall -O2 $(INCS) -DREX_OPENLIB=luaopen_$(PROJECT) \
-DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" $(MYCFLAGS)
-DREX_LIBNAME=\"$(PROJECT)\" -DVERSION=\"$(VERSION)\" \
$(CREATEGLOBAL) $(MYCFLAGS)
DEFFILE = $(PROJECT).def
EXPORTED = luaopen_$(PROJECT)
INCS = -I$(LUAINC) $(MYINCS)
@@ -61,6 +61,10 @@ test:
$(BIN): $(OBJ) $(DEFFILE)
$(CC) $(DEFFILE) $(OBJ) -L$(LIBPATH) $(LIBS) -o $@ -shared
lib$(PROJECT)$(LUAVERSION).a: $(OBJ)
$(AR) $@ $?
$(RANLIB) $@
$(DEFFILE):
echo EXPORTS > $@
for %%d in ($(EXPORTED)) do echo %%d>> $@