From 3e01956eaf3c532b5ceefd3f37ed8fa8b27fa0ed Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sat, 6 Jun 2015 18:51:27 +0100 Subject: [PATCH] Fixed bug #60 "redundant use of YYMARKER". Bug description: sometimes re2c would generate code that backups current input position (e.g. 'YYMARKER = YYCURSOR'), but wouldn't generate code that restores backuped position (e.g. 'YYCURSOR = YYMARKER'). Analyses: DFA may have overlapping rules (e.g. "a" and "aaa"). In such cases, if the shorter rule matched, lexer must attempt to match the longer one. If the longer rule also mathed, then lexer prefers it to the shorter rule. If the longer rule didn't match, lexer must backtrack input position to the point when the shorter rule matched. In order to be able to backtrack, re2c must generate backup code (e.g. 'YYMARKER = YYCURSOR') and restore code (e.g. 'YYCURSOR = YYMARKER'). In some rare cases DFA has overlapping rules, but if the shorter rule matched, then the longer rule will always match (perhaps on an arbitrary long input string), e.g.: /*!re2c [^]+ "a" { 1st } "b" { 2nd } */ In this cases there's no need to generate backup code for 2nd rule: lexer will either encounter final "a" and the 1st rule will match or YYFILL will not return; anyway, restore code will never be run. re2c used to output backup code but not restore code in such cases. This is the bug: backup code is useless without restore code and should be omitted. In future re2c should warn about such cases (when the shorter of two overlapping rules is shadowed by the longer one). The fix: postpone insertion of save actions (those with backup code) untill it is known if restore code will be generated. I also removed obsolete global variable 'bUsedYYMarker', which was always set to 'true' (it should be per-DFA, not per-block configuration anyway). --- re2c/src/codegen/emit_action.cc | 19 ++--- re2c/src/codegen/prepare_dfa.cc | 15 ++-- re2c/src/globals.h | 1 - re2c/src/main.cc | 1 - re2c/test/bug60_redundant_yymarker.ci.c | 69 +++++++++++++++++++ re2c/test/bug60_redundant_yymarker.ci.re | 9 +++ ...11_zend_ini_scanner.igcFd--case-inverted.c | 18 ++--- 7 files changed, 102 insertions(+), 30 deletions(-) create mode 100644 re2c/test/bug60_redundant_yymarker.ci.c create mode 100644 re2c/test/bug60_redundant_yymarker.ci.re diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index fb393d65..66f0a3a2 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -113,11 +113,11 @@ void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * co if (s->link) { - need(o, ind, readCh, s->depth, initial.setMarker && bUsedYYMarker); + need(o, ind, readCh, s->depth, initial.setMarker); } else { - if (initial.setMarker && bUsedYYMarker) + if (initial.setMarker) { o << input_api.stmt_backup (ind); } @@ -139,22 +139,12 @@ void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const if (s->link) { - if (bUsedYYMarker) - { - o << input_api.stmt_skip_backup (ind); - } + o << input_api.stmt_skip_backup (ind); need(o, ind, readCh, s->depth, false); } else { - if (bUsedYYMarker) - { - o << input_api.stmt_skip_backup_peek (ind); - } - else - { - o << input_api.stmt_skip_peek (ind); - } + o << input_api.stmt_skip_backup_peek (ind); readCh = false; } } @@ -180,7 +170,6 @@ void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * con { if (accept.size() > 0) { - bUsedYYMarker = true; if (!DFlag) { o << input_api.stmt_restore (ind); diff --git a/re2c/src/codegen/prepare_dfa.cc b/re2c/src/codegen/prepare_dfa.cc index 49456c5c..55abde38 100644 --- a/re2c/src/codegen/prepare_dfa.cc +++ b/re2c/src/codegen/prepare_dfa.cc @@ -180,8 +180,7 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) memset(saves, ~0, (nRules)*sizeof(*saves)); // mark backtracking points - bSaveOnHead = false; - + std::vector backup_states; for (State * s = head; s; s = s->next) { if (s->rule) @@ -194,9 +193,7 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) { saves[s->rule->accept] = nSaves++; } - - bSaveOnHead |= s == head; - s->action.set_save (saves[s->rule->accept]); + backup_states.push_back (s); } } } @@ -247,6 +244,14 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) if (accfixup) { + // insert backup actions + for (std::vector::iterator i = backup_states.begin (); i != backup_states.end (); ++i) + { + (*i)->action.set_save (saves[(*i)->rule->accept]); + } + // backup action for initial state must be saved explicitly + // because it will be overwritten by initial action + bSaveOnHead = !backup_states.empty () && backup_states.front () == head; for (uint32_t i = 0; i < nRules; ++i) { if (saves[i] != ~0u) diff --git a/re2c/src/globals.h b/re2c/src/globals.h index 77053224..771a8a5c 100644 --- a/re2c/src/globals.h +++ b/re2c/src/globals.h @@ -27,7 +27,6 @@ extern bool tFlag; extern bool flag_skeleton; extern bool bNoGenerationDate; -extern bool bUsedYYMarker; extern bool bUsedYYBitmap; extern std::string labelPrefix; diff --git a/re2c/src/main.cc b/re2c/src/main.cc index d14975bf..7d005971 100644 --- a/re2c/src/main.cc +++ b/re2c/src/main.cc @@ -28,7 +28,6 @@ bool flag_skeleton = false; bool bNoGenerationDate = false; bool bUsedYYBitmap = false; -bool bUsedYYMarker = true; bool bEmitYYCh = true; bool bUseStateNext = false; bool bUseYYFill = true; diff --git a/re2c/test/bug60_redundant_yymarker.ci.c b/re2c/test/bug60_redundant_yymarker.ci.c new file mode 100644 index 00000000..65cdb0e5 --- /dev/null +++ b/re2c/test/bug60_redundant_yymarker.ci.c @@ -0,0 +1,69 @@ +/* Generated by re2c */ + +{ + YYCTYPE yych; + switch (YYGETCONDITION()) { + case yycc1: goto yyc_c1; + case yycc2: goto yyc_c2; + } +/* *********************************** */ +yyc_c1: + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch (yych) { + case 'b': goto yy5; + default: goto yy3; + } +yy3: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; +yy4: + switch (yych) { + case 'a': goto yy7; + default: goto yy3; + } +yy5: + ++YYCURSOR; + yych = *YYCURSOR; + goto yy4; + {} +yy7: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy7; + default: goto yy3; + } + {} +/* *********************************** */ +yyc_c2: + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = *YYCURSOR; + switch (yych) { + case 'd': goto yy13; + default: goto yy12; + } +yy12: + YYCURSOR = YYMARKER; + goto yy14; +yy13: + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'd': goto yy15; + default: goto yy14; + } +yy14: + {} +yy15: + yych = *++YYCURSOR; + switch (yych) { + case 'd': goto yy16; + default: goto yy12; + } +yy16: + ++YYCURSOR; + {} +} + diff --git a/re2c/test/bug60_redundant_yymarker.ci.re b/re2c/test/bug60_redundant_yymarker.ci.re new file mode 100644 index 00000000..99e590e4 --- /dev/null +++ b/re2c/test/bug60_redundant_yymarker.ci.re @@ -0,0 +1,9 @@ +/*!re2c + + [^]+ "a" {} + "b" {} + + "d" {} + "ddd" {} + +*/ diff --git a/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c b/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c index 78eca42b..7fdfb961 100644 --- a/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c +++ b/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c @@ -490,7 +490,8 @@ yy4: } yy5: YYDEBUG(5, *YYCURSOR); - yych = *(YYMARKER = ++YYCURSOR); + ++YYCURSOR; + yych = *YYCURSOR; goto yy64; yy6: YYDEBUG(6, *YYCURSOR); @@ -516,7 +517,7 @@ yy9: goto yy8; yy10: YYDEBUG(10, *YYCURSOR); - yych = *(YYMARKER = ++YYCURSOR); + yych = *++YYCURSOR; { static void *yytarget[256] = { &&yy26, &&yy26, &&yy26, &&yy26, &&yy26, &&yy26, &&yy26, &&yy26, @@ -568,7 +569,8 @@ yy13: goto yy26; yy14: YYDEBUG(14, *YYCURSOR); - yych = *(YYMARKER = ++YYCURSOR); + ++YYCURSOR; + yych = *YYCURSOR; goto yy59; YYDEBUG(15, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); @@ -1077,7 +1079,7 @@ yy62: goto yy61; yy63: YYDEBUG(63, *YYCURSOR); - YYMARKER = ++YYCURSOR; + ++YYCURSOR; YYFILL(2); yych = *YYCURSOR; yy64: @@ -1121,7 +1123,7 @@ yy64: } yy65: YYDEBUG(65, *YYCURSOR); - YYMARKER = ++YYCURSOR; + ++YYCURSOR; YYFILL(2); yych = *YYCURSOR; YYDEBUG(66, *YYCURSOR); @@ -2073,7 +2075,7 @@ end_raw_value_chars: } yy137: YYDEBUG(137, *YYCURSOR); - yych = *(YYMARKER = ++YYCURSOR); + yych = *++YYCURSOR; { static void *yytarget[256] = { &&yy136, &&yy136, &&yy136, &&yy136, &&yy136, &&yy136, &&yy136, &&yy136, @@ -2129,7 +2131,7 @@ yy140: goto yy139; yy141: YYDEBUG(141, *YYCURSOR); - yych = *(YYMARKER = ++YYCURSOR); + yych = *++YYCURSOR; goto yy143; yy142: YYDEBUG(142, *YYCURSOR); @@ -2164,7 +2166,7 @@ yy147: goto yy139; yy148: YYDEBUG(148, *YYCURSOR); - YYMARKER = ++YYCURSOR; + ++YYCURSOR; YYFILL(2); yych = *YYCURSOR; yy149: -- 2.40.0