]> granicus.if.org Git - re2c/commitdiff
Fixed bug #60 "redundant use of YYMARKER".
authorUlya Trofimovich <skvadrik@gmail.com>
Sat, 6 Jun 2015 17:51:27 +0000 (18:51 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Sat, 6 Jun 2015 17:51:27 +0000 (18:51 +0100)
Bug description: sometimes re2c would generate code that backups
current input position (e.g. 'YYMARKER = YYCURSOR'), but wouldn't
generate code that restores backuped position (e.g. 'YYCURSOR =
YYMARKER').

Analyses: DFA may have overlapping rules (e.g. "a" and "aaa").
In such cases, if the shorter rule matched, lexer must attempt to
match the longer one. If the longer rule also mathed, then lexer
prefers it to the shorter rule. If the longer rule didn't match,
lexer must backtrack input position to the point when the shorter
rule matched. In order to be able to backtrack, re2c must generate
backup code (e.g. 'YYMARKER = YYCURSOR') and restore code (e.g.
'YYCURSOR = YYMARKER').
In some rare cases DFA has overlapping rules, but if the shorter rule
matched, then the longer rule will always match (perhaps on an
arbitrary long input string), e.g.:
    /*!re2c
        [^]+ "a" { 1st }
        "b"      { 2nd }
    */
In this cases there's no need to generate backup code for 2nd rule:
lexer will either encounter final "a" and the 1st rule will match
or YYFILL will not return; anyway, restore code will never be run.
re2c used to output backup code but not restore code in such cases.
This is the bug: backup code is useless without restore code and
should be omitted.

In future re2c should warn about such cases (when the shorter of
two overlapping rules is shadowed by the longer one).

The fix: postpone insertion of save actions (those with backup code)
untill it is known if restore code will be generated.
I also removed obsolete global variable 'bUsedYYMarker', which was
always set to 'true' (it should be per-DFA, not per-block configuration
anyway).

re2c/src/codegen/emit_action.cc
re2c/src/codegen/prepare_dfa.cc
re2c/src/globals.h
re2c/src/main.cc
re2c/test/bug60_redundant_yymarker.ci.c [new file with mode: 0644]
re2c/test/bug60_redundant_yymarker.ci.re [new file with mode: 0644]
re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c

index fb393d656a23a35118c36cf5fb99c5c14e5b3751..66f0a3a231083b7b09e21b98cc109c60fb9c82b7 100644 (file)
@@ -113,11 +113,11 @@ void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * co
 
        if (s->link)
        {
-               need(o, ind, readCh, s->depth, initial.setMarker && bUsedYYMarker);
+               need(o, ind, readCh, s->depth, initial.setMarker);
        }
        else
        {
-               if (initial.setMarker && bUsedYYMarker)
+               if (initial.setMarker)
                {
                        o << input_api.stmt_backup (ind);
                }
@@ -139,22 +139,12 @@ void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const
 
        if (s->link)
        {
-               if (bUsedYYMarker)
-               {
-                       o << input_api.stmt_skip_backup (ind);
-               }
+               o << input_api.stmt_skip_backup (ind);
                need(o, ind, readCh, s->depth, false);
        }
        else
        {
-               if (bUsedYYMarker)
-               {
-                       o << input_api.stmt_skip_backup_peek (ind);
-               }
-               else
-               {
-                       o << input_api.stmt_skip_peek (ind);
-               }
+               o << input_api.stmt_skip_backup_peek (ind);
                readCh = false;
        }
 }
@@ -180,7 +170,6 @@ void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * con
 {
        if (accept.size() > 0)
        {
-               bUsedYYMarker = true;
                if (!DFlag)
                {
                        o << input_api.stmt_restore (ind);
index 49456c5c5c3285d54f3e4b53a338a358838bbfd5..55abde3843f2725d63b5fc3d77086e35765f4caa 100644 (file)
@@ -180,8 +180,7 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill)
        memset(saves, ~0, (nRules)*sizeof(*saves));
 
        // mark backtracking points
-       bSaveOnHead = false;
-
+       std::vector<State *> backup_states;
        for (State * s = head; s; s = s->next)
        {
                if (s->rule)
@@ -194,9 +193,7 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill)
                                        {
                                                saves[s->rule->accept] = nSaves++;
                                        }
-
-                                       bSaveOnHead |= s == head;
-                                       s->action.set_save (saves[s->rule->accept]);
+                                       backup_states.push_back (s);
                                }
                        }
                }
@@ -247,6 +244,14 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill)
        
        if (accfixup)
        {
+               // insert backup actions
+               for (std::vector<State *>::iterator i = backup_states.begin (); i != backup_states.end (); ++i)
+               {
+                       (*i)->action.set_save (saves[(*i)->rule->accept]);
+               }
+               // backup action for initial state must be saved explicitly
+               // because it will be overwritten by initial action
+               bSaveOnHead = !backup_states.empty () && backup_states.front () == head;
                for (uint32_t i = 0; i < nRules; ++i)
                {
                        if (saves[i] != ~0u)
index 77053224bead1fe0b905e1a0be64a16be9ff945d..771a8a5c7ef7b2aa8bba98feabce2bbabe74d5b9 100644 (file)
@@ -27,7 +27,6 @@ extern bool tFlag;
 extern bool flag_skeleton;
 
 extern bool bNoGenerationDate;
-extern bool bUsedYYMarker;
 extern bool bUsedYYBitmap;
 
 extern std::string labelPrefix;
index d14975bf014b4a606d77a69a5d413dab11718315..7d005971e642da043319a09502cb9674c5f5b0f1 100644 (file)
@@ -28,7 +28,6 @@ bool flag_skeleton = false;
 
 bool bNoGenerationDate = false;
 bool bUsedYYBitmap  = false;
-bool bUsedYYMarker  = true;
 bool bEmitYYCh       = true;
 bool bUseStateNext   = false;
 bool bUseYYFill      = true;
diff --git a/re2c/test/bug60_redundant_yymarker.ci.c b/re2c/test/bug60_redundant_yymarker.ci.c
new file mode 100644 (file)
index 0000000..65cdb0e
--- /dev/null
@@ -0,0 +1,69 @@
+/* Generated by re2c */
+
+{
+       YYCTYPE yych;
+       switch (YYGETCONDITION()) {
+       case yycc1: goto yyc_c1;
+       case yycc2: goto yyc_c2;
+       }
+/* *********************************** */
+yyc_c1:
+       if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'b':       goto yy5;
+       default:        goto yy3;
+       }
+yy3:
+       ++YYCURSOR;
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+yy4:
+       switch (yych) {
+       case 'a':       goto yy7;
+       default:        goto yy3;
+       }
+yy5:
+       ++YYCURSOR;
+       yych = *YYCURSOR;
+       goto yy4;
+       {}
+yy7:
+       ++YYCURSOR;
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'a':       goto yy7;
+       default:        goto yy3;
+       }
+       {}
+/* *********************************** */
+yyc_c2:
+       if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'd':       goto yy13;
+       default:        goto yy12;
+       }
+yy12:
+       YYCURSOR = YYMARKER;
+       goto yy14;
+yy13:
+       yych = *(YYMARKER = ++YYCURSOR);
+       switch (yych) {
+       case 'd':       goto yy15;
+       default:        goto yy14;
+       }
+yy14:
+       {}
+yy15:
+       yych = *++YYCURSOR;
+       switch (yych) {
+       case 'd':       goto yy16;
+       default:        goto yy12;
+       }
+yy16:
+       ++YYCURSOR;
+       {}
+}
+
diff --git a/re2c/test/bug60_redundant_yymarker.ci.re b/re2c/test/bug60_redundant_yymarker.ci.re
new file mode 100644 (file)
index 0000000..99e590e
--- /dev/null
@@ -0,0 +1,9 @@
+/*!re2c
+
+<c1> [^]+ "a" {}
+<c1> "b" {}
+
+<c2> "d" {}
+<c2> "ddd" {}
+
+*/
index 78eca42b2e8e7a7a1a5dc105d0a1c428ff9f8f79..7fdfb961b70fb016aac200e1b8fc6eec6082dadc 100644 (file)
@@ -490,7 +490,8 @@ yy4:
 }
 yy5:
                YYDEBUG(5, *YYCURSOR);
-               yych = *(YYMARKER = ++YYCURSOR);
+               ++YYCURSOR;
+               yych = *YYCURSOR;
                goto yy64;
 yy6:
                YYDEBUG(6, *YYCURSOR);
@@ -516,7 +517,7 @@ yy9:
                goto yy8;
 yy10:
                YYDEBUG(10, *YYCURSOR);
-               yych = *(YYMARKER = ++YYCURSOR);
+               yych = *++YYCURSOR;
                {
                        static void *yytarget[256] = {
                                &&yy26, &&yy26, &&yy26, &&yy26, &&yy26, &&yy26, &&yy26, &&yy26,
@@ -568,7 +569,8 @@ yy13:
                goto yy26;
 yy14:
                YYDEBUG(14, *YYCURSOR);
-               yych = *(YYMARKER = ++YYCURSOR);
+               ++YYCURSOR;
+               yych = *YYCURSOR;
                goto yy59;
                YYDEBUG(15, *YYCURSOR);
                yyleng = YYCURSOR - SCNG(yy_text);
@@ -1077,7 +1079,7 @@ yy62:
                goto yy61;
 yy63:
                YYDEBUG(63, *YYCURSOR);
-               YYMARKER = ++YYCURSOR;
+               ++YYCURSOR;
                YYFILL(2);
                yych = *YYCURSOR;
 yy64:
@@ -1121,7 +1123,7 @@ yy64:
                }
 yy65:
                YYDEBUG(65, *YYCURSOR);
-               YYMARKER = ++YYCURSOR;
+               ++YYCURSOR;
                YYFILL(2);
                yych = *YYCURSOR;
                YYDEBUG(66, *YYCURSOR);
@@ -2073,7 +2075,7 @@ end_raw_value_chars:
 }
 yy137:
                YYDEBUG(137, *YYCURSOR);
-               yych = *(YYMARKER = ++YYCURSOR);
+               yych = *++YYCURSOR;
                {
                        static void *yytarget[256] = {
                                &&yy136, &&yy136, &&yy136, &&yy136, &&yy136, &&yy136, &&yy136, &&yy136,
@@ -2129,7 +2131,7 @@ yy140:
                goto yy139;
 yy141:
                YYDEBUG(141, *YYCURSOR);
-               yych = *(YYMARKER = ++YYCURSOR);
+               yych = *++YYCURSOR;
                goto yy143;
 yy142:
                YYDEBUG(142, *YYCURSOR);
@@ -2164,7 +2166,7 @@ yy147:
                goto yy139;
 yy148:
                YYDEBUG(148, *YYCURSOR);
-               YYMARKER = ++YYCURSOR;
+               ++YYCURSOR;
                YYFILL(2);
                yych = *YYCURSOR;
 yy149: