From 2cbf596c7f6d4ef21798bb06efd7542781124300 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Wed, 31 Mar 2004 15:50:43 +0000 Subject: [PATCH] patched a bug in parsing production 1 and 2 of xmlschemas regexp that * xmlregexp.c: patched a bug in parsing production 1 and 2 of xmlschemas regexp that William pointed out while working on #134120 * test/regexp/branch result/regexp/branch: added a specific regression test Daniel --- ChangeLog | 8 +++++ result/regexp/branch | 14 +++++++++ test/regexp/branch | 14 +++++++++ xmlregexp.c | 71 +++++++++++++++++++++++--------------------- 4 files changed, 73 insertions(+), 34 deletions(-) create mode 100644 result/regexp/branch create mode 100644 test/regexp/branch diff --git a/ChangeLog b/ChangeLog index cb3505a9..13615a8f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Wed Mar 31 17:47:28 CEST 2004 Daniel Veillard + + * xmlregexp.c: patched a bug in parsing production 1 and 2 of + xmlschemas regexp that William pointed out while working on + #134120 + * test/regexp/branch result/regexp/branch: added a specific + regression test + Wed Mar 31 09:50:32 HKT 2004 William Brack * Makefile.am: added PYTHONPATH to python tests for Schemas diff --git a/result/regexp/branch b/result/regexp/branch new file mode 100644 index 00000000..dd703ac2 --- /dev/null +++ b/result/regexp/branch @@ -0,0 +1,14 @@ +Regexp: a|b(d|e(g|h|i)|f)|c +a: Ok +c: Ok +bd: Ok +bf: Ok +beg: Ok +beh: Ok +bei: Ok +b: Fail +be: Fail +bi: Fail +f: Fail +ab: Fail +ac: Fail diff --git a/test/regexp/branch b/test/regexp/branch new file mode 100644 index 00000000..6a728e67 --- /dev/null +++ b/test/regexp/branch @@ -0,0 +1,14 @@ +=>a|b(d|e(g|h|i)|f)|c +a +c +bd +bf +beg +beh +bei +b +be +bi +f +ab +ac diff --git a/xmlregexp.c b/xmlregexp.c index 057458b2..d1e6f38d 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -1214,14 +1214,15 @@ xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, #ifdef DEBUG_REGEXP_GRAPH printf("Add trans from %d to %d ", state->no, target->no); if (count == REGEXP_ALL_COUNTER) - printf("all transition"); + printf("all transition\n"); else if (count >= 0) - printf("count based %d", count); + printf("count based %d\n", count); else if (counter >= 0) - printf("counted %d", counter); + printf("counted %d\n", counter); else if (atom == NULL) - printf("epsilon transition"); - printf("\n"); + printf("epsilon transition\n"); + else if (atom != NULL) + xmlRegPrintAtom(stdout, atom); #endif state->trans[state->nbTrans].atom = atom; @@ -3862,50 +3863,33 @@ xmlFAParsePiece(xmlRegParserCtxtPtr ctxt) { /** * xmlFAParseBranch: * @ctxt: a regexp parser context - * @first: is taht the first * * [2] branch ::= piece* 8 */ static int -xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) { +xmlFAParseBranch(xmlRegParserCtxtPtr ctxt) { xmlRegStatePtr previous; - xmlRegAtomPtr prevatom = NULL; int ret; previous = ctxt->state; ret = xmlFAParsePiece(ctxt); if (ret != 0) { - if (first) { - if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0) - return(-1); - previous = ctxt->state; - } else { - prevatom = ctxt->atom; - } + if (xmlFAGenerateTransitions(ctxt, previous, NULL, ctxt->atom) < 0) + return(-1); + previous = ctxt->state; ctxt->atom = NULL; } while ((ret != 0) && (ctxt->error == 0)) { ret = xmlFAParsePiece(ctxt); if (ret != 0) { - if (first) { - if (xmlFAGenerateTransitions(ctxt, previous, NULL, - ctxt->atom) < 0) + if (xmlFAGenerateTransitions(ctxt, previous, NULL, + ctxt->atom) < 0) return(-1); - } else { - if (xmlFAGenerateTransitions(ctxt, previous, NULL, - prevatom) < 0) - return(-1); - prevatom = ctxt->atom; - } previous = ctxt->state; ctxt->atom = NULL; } } - if (!first) { - if (xmlFAGenerateTransitions(ctxt, previous, ctxt->end, prevatom) < 0) - return(-1); - } return(0); } @@ -3918,12 +3902,21 @@ xmlFAParseBranch(xmlRegParserCtxtPtr ctxt, int first) { */ static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { - xmlRegStatePtr start, end, oldend; + xmlRegStatePtr start, end, oldend, oldstart; oldend = ctxt->end; + oldstart = ctxt->state; + /* if not top start should have been generated by an epsilon trans */ start = ctxt->state; - xmlFAParseBranch(ctxt, (ctxt->end == NULL)); + ctxt->end = NULL; + xmlFAParseBranch(ctxt); + if (top) { +#ifdef DEBUG_REGEXP_GRAPH + printf("State %d is final\n", ctxt->state->no); +#endif + ctxt->state->type = XML_REGEXP_FINAL_STATE; + } if (CUR != '|') { ctxt->end = ctxt->state; return; @@ -3932,11 +3925,21 @@ xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top) { while ((CUR == '|') && (ctxt->error == 0)) { NEXT; ctxt->state = start; - ctxt->end = end; - xmlFAParseBranch(ctxt, 0); + ctxt->end = NULL; + xmlFAParseBranch(ctxt); + if (top) { + ctxt->state->type = XML_REGEXP_FINAL_STATE; +#ifdef DEBUG_REGEXP_GRAPH + printf("State %d is final\n", ctxt->state->no); +#endif + } else { + xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, end); + } + } + if (!top) { + ctxt->state = end; + ctxt->end = end; } - if (!top) - ctxt->end = oldend; } /************************************************************************