From ebadef2015bdbd4731ad58aecfb93b57109c757d Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Thu, 4 Jun 2015 22:25:03 +0100 Subject: [PATCH] Fixed bug #59 "bogus 'yyaccept' in '-c' mode". We have one 'yyaccept' initialization per re2c block. Each block consists of one or more DFA (multiple DFA in '-c' mode in case of multiple conditions). Each DFA may or may not use 'yyaccept' (that is, save 'yyaccept' in some states and have a dispatch state based on saved 'yyaccept' value). Description of the bug: in '-c' mode, sometimes a DFA would have states that save 'yyaccept', but no dispatch state that uses that saved values. DFA didn't actually need 'yyaccept' (all the assignments vanished if other conditions that need 'yyaccept' were removed). The essence of the bug: re2c decided whether to output 'yyaccept' related stuff on a per-block basis: for multiple conditions in the same block, the same decision was made (if any condition needed 'yyaccept', all of them would to output it). The fix: 'yyaccept' initialization should be done on a per-block basis, while assignments to 'yyaccept' should be done on a per-DFA basis. Also, 'yyaccept' initialization must be delayed, while assignments to 'yyaccept' must not. Note: we may consider per-DFA 'yyaccept' initialization (have a local 'yyaccept' variable per DFA). This wouldn't conflict with '-f' switch (as it might seem) as long as we name all the variables 'yyaccept' and don't generate any 'yyaccept' initializations with '-f'. --- re2c/src/codegen/emit.h | 1 + re2c/src/codegen/emit_action.cc | 14 +-- re2c/src/codegen/emit_dfa.cc | 3 +- re2c/src/codegen/output.cc | 20 ----- re2c/src/codegen/output.h | 9 -- re2c/src/codegen/prepare_dfa.cc | 6 +- re2c/src/dfa/actions.cc | 2 +- re2c/src/dfa/dfa.h | 2 +- re2c/test/bug59_bogus_yyaccept.ci.c | 87 +++++++++++++++++++ re2c/test/bug59_bogus_yyaccept.ci.re | 10 +++ .../php20140822_zend_language_scanner.igcFd.c | 5 -- re2c/test/php20150211_phpdbg_lexer.igcFd.c | 5 -- ...11_zend_ini_scanner.igcFd--case-inverted.c | 8 -- ...nd_language_scanner.igcFd--case-inverted.c | 6 -- 14 files changed, 115 insertions(+), 63 deletions(-) create mode 100644 re2c/test/bug59_bogus_yyaccept.ci.c create mode 100644 re2c/test/bug59_bogus_yyaccept.ci.re diff --git a/re2c/src/codegen/emit.h b/re2c/src/codegen/emit.h index f7860c49..96ea5439 100644 --- a/re2c/src/codegen/emit.h +++ b/re2c/src/codegen/emit.h @@ -17,6 +17,7 @@ void emit_action , const State * const s , const std::string & condName , const std::set & used_labels + , bool save_yyaccept ); // helpers diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index f6b84f89..fb393d65 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -10,7 +10,7 @@ namespace re2c static void need (OutputFile & o, uint32_t ind, bool & readCh, uint32_t n, bool bSetMarker); static void emit_match (OutputFile & o, uint32_t ind, bool & readCh, const State * const s); static void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const Initial & init, const std::set & used_labels); -static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save); +static void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept); static void emit_accept_binary (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept, uint32_t l, uint32_t r); static void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, const accept_t & accept); static void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleOp * const rule, const std::string & condName); @@ -25,6 +25,7 @@ void emit_action , const State * const s , const std::string & condName , const std::set & used_labels + , bool save_yyaccept ) { switch (action.type) @@ -36,7 +37,7 @@ void emit_action emit_initial (o, ind, readCh, s, * action.info.initial, used_labels); break; case Action::SAVE: - emit_save (o, ind, readCh, s, action.info.save); + emit_save (o, ind, readCh, s, action.info.save, save_yyaccept); break; case Action::MOVE: break; @@ -124,14 +125,17 @@ void emit_initial (OutputFile & o, uint32_t ind, bool & readCh, const State * co } } -void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save) +void emit_save (OutputFile & o, uint32_t ind, bool & readCh, const State * const s, uint32_t save, bool save_yyaccept) { if (DFlag) { return; } - o.insert_yyaccept_selector (ind, save); + if (save_yyaccept) + { + o << indent (ind) << mapCodeName["yyaccept"] << " = " << save << ";\n"; + } if (s->link) { @@ -190,8 +194,6 @@ void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * con if (accept.size() > 1) { - o.set_used_yyaccept (); - if (gFlag && accept.size() >= cGotoThreshold) { o << indent(ind++) << "{\n"; diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index b1582f96..9ffdea61 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -204,11 +204,12 @@ void DFA::emit(Output & output, uint32_t& ind, const RegExpMap* specMap, const s } // Generate code + const bool save_yyaccept = accept_map.size () > 1; for (State * s = head; s; s = s->next) { bool readCh = false; emit_state (o, ind, s, used_labels.count (s->label)); - emit_action (s->action, o, ind, readCh, s, condName, used_labels); + emit_action (s->action, o, ind, readCh, s, condName, used_labels, save_yyaccept); s->go.emit(o, ind, readCh); } diff --git a/re2c/src/codegen/output.cc b/re2c/src/codegen/output.cc index c6f322e3..d5155d4e 100644 --- a/re2c/src/codegen/output.cc +++ b/re2c/src/codegen/output.cc @@ -14,7 +14,6 @@ namespace re2c OutputFragment::OutputFragment (type_t t, uint32_t i) : type (t) , stream () - , info () , indent (i) {} @@ -190,14 +189,6 @@ void OutputFile::insert_yyaccept_init (uint32_t ind) insert_code (); } -void OutputFile::insert_yyaccept_selector (uint32_t ind, uint32_t selector) -{ - OutputFragment * p = new OutputFragment (OutputFragment::YYACCEPT_SELECTOR, ind); - p->info.yyaccept_selector = selector; - blocks.back ()->fragments.push_back (p); - insert_code (); -} - void OutputFile::insert_yymaxfill () { blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::YYMAXFILL, 0)); @@ -265,9 +256,6 @@ void OutputFile::emit case OutputFragment::YYACCEPT_INIT: output_yyaccept_init (f.stream, f.indent, b.used_yyaccept); break; - case OutputFragment::YYACCEPT_SELECTOR: - output_yyaccept_selector (f.stream, f.indent, b.used_yyaccept, f.info.yyaccept_selector); - break; case OutputFragment::YYMAXFILL: output_yymaxfill (f.stream, max_fill); break; @@ -446,14 +434,6 @@ void output_yyaccept_init (std::ostream & o, uint32_t ind, bool used_yyaccept) } } -void output_yyaccept_selector (std::ostream & o, uint32_t ind, bool used_yyaccept, uint32_t yyaccept_selector) -{ - if (used_yyaccept) - { - o << indent (ind) << mapCodeName["yyaccept"] << " = " << yyaccept_selector << ";\n"; - } -} - void output_yymaxfill (std::ostream & o, uint32_t max_fill) { o << "#define YYMAXFILL " << max_fill << "\n"; diff --git a/re2c/src/codegen/output.h b/re2c/src/codegen/output.h index 262a2ca9..c6800ded 100644 --- a/re2c/src/codegen/output.h +++ b/re2c/src/codegen/output.h @@ -22,18 +22,11 @@ struct OutputFragment , STATE_GOTO , TYPES , YYACCEPT_INIT - , YYACCEPT_SELECTOR , YYMAXFILL }; - union info_t - { - uint32_t yyaccept_selector; - }; - type_t type; std::ostringstream stream; - info_t info; uint32_t indent; OutputFragment (type_t t, uint32_t i); @@ -90,7 +83,6 @@ public: void insert_state_goto (uint32_t ind); void insert_types (); void insert_yyaccept_init (uint32_t ind); - void insert_yyaccept_selector (uint32_t ind, uint32_t selector); void insert_yymaxfill (); void set_used_yyaccept (); @@ -146,7 +138,6 @@ void output_state_goto (std::ostream &, uint32_t, uint32_t); void output_types (std::ostream &, uint32_t, const std::vector &); void output_version_time (std::ostream &); void output_yyaccept_init (std::ostream &, uint32_t, bool); -void output_yyaccept_selector (std::ostream &, uint32_t, bool, uint32_t); void output_yymaxfill (std::ostream &, uint32_t); // helpers diff --git a/re2c/src/codegen/prepare_dfa.cc b/re2c/src/codegen/prepare_dfa.cc index bd1113f6..49456c5c 100644 --- a/re2c/src/codegen/prepare_dfa.cc +++ b/re2c/src/codegen/prepare_dfa.cc @@ -153,7 +153,7 @@ void DFA::findBaseState() operator delete (span); } -void DFA::prepare(uint32_t & max_fill) +void DFA::prepare(OutputFile & o, uint32_t & max_fill) { bUsedYYBitmap = false; @@ -254,6 +254,10 @@ void DFA::prepare(uint32_t & max_fill) accept_map[saves[i]] = rules[i]; } } + if (accept_map.size () > 1) + { + o.set_used_yyaccept (); + } accfixup->action.set_accept (&accept_map); } delete [] saves; diff --git a/re2c/src/dfa/actions.cc b/re2c/src/dfa/actions.cc index 0bc9ce54..80b05104 100644 --- a/re2c/src/dfa/actions.cc +++ b/re2c/src/dfa/actions.cc @@ -1064,7 +1064,7 @@ smart_ptr genCode(RegExp *re, Output & output, uint32_t ind) skeleton.emit_data (output.data); skeleton::emit_prolog (output.source, ind, output.data.file_name.c_str ()); } - dfa->prepare (output.max_fill); + dfa->prepare (output.source, output.max_fill); return dfa; } diff --git a/re2c/src/dfa/dfa.h b/re2c/src/dfa/dfa.h index 46ec519f..d85b8b2b 100644 --- a/re2c/src/dfa/dfa.h +++ b/re2c/src/dfa/dfa.h @@ -36,7 +36,7 @@ public: void findSCCs (); void findBaseState (); - void prepare (uint32_t &); + void prepare (OutputFile & o, uint32_t &); void count_used_labels (std::set & used, label_t prolog, label_t start, bool force_start) const; void emit (Output &, uint32_t &, const RegExpMap *, const std::string &, bool, bool &); diff --git a/re2c/test/bug59_bogus_yyaccept.ci.c b/re2c/test/bug59_bogus_yyaccept.ci.c new file mode 100644 index 00000000..faf7248c --- /dev/null +++ b/re2c/test/bug59_bogus_yyaccept.ci.c @@ -0,0 +1,87 @@ +/* Generated by re2c */ + +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + switch (YYGETCONDITION()) { + case yycc1: goto yyc_c1; + case yycc2: goto yyc_c2; + } +/* *********************************** */ +yyc_c1: + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = *YYCURSOR; + switch (yych) { + case 'b': goto yy4; + default: goto yy3; + } +yy3: + YYCURSOR = YYMARKER; + goto yy5; +yy4: + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'b': goto yy6; + default: goto yy5; + } +yy5: + {} +yy6: + yych = *++YYCURSOR; + switch (yych) { + case 'b': goto yy7; + default: goto yy3; + } +yy7: + ++YYCURSOR; + {} +/* *********************************** */ +yyc_c2: + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy12; + default: goto yy11; + } +yy11: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy13; + } else { + goto yy16; + } +yy12: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'a': goto yy14; + default: goto yy13; + } +yy13: + {} +yy14: + yych = *++YYCURSOR; + switch (yych) { + case 'a': goto yy15; + default: goto yy11; + } +yy15: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'a': goto yy17; + default: goto yy16; + } +yy16: + {} +yy17: + yych = *++YYCURSOR; + switch (yych) { + case 'a': goto yy18; + default: goto yy11; + } +yy18: + ++YYCURSOR; + {} +} + diff --git a/re2c/test/bug59_bogus_yyaccept.ci.re b/re2c/test/bug59_bogus_yyaccept.ci.re new file mode 100644 index 00000000..53b1343f --- /dev/null +++ b/re2c/test/bug59_bogus_yyaccept.ci.re @@ -0,0 +1,10 @@ +/*!re2c + + "b" {} + "bbb" {} + + "a" {} + "aaa" {} + "aaaaa" {} + +*/ diff --git a/re2c/test/php20140822_zend_language_scanner.igcFd.c b/re2c/test/php20140822_zend_language_scanner.igcFd.c index b21de193..cd1d341c 100644 --- a/re2c/test/php20140822_zend_language_scanner.igcFd.c +++ b/re2c/test/php20140822_zend_language_scanner.igcFd.c @@ -1648,7 +1648,6 @@ yy62: } yy64: YYDEBUG(64, *YYCURSOR); - yyaccept = 0; YYMARKER = ++YYCURSOR; YYFILL(3); yych = *YYCURSOR; @@ -1891,7 +1890,6 @@ yy84: } yy86: YYDEBUG(86, *YYCURSOR); - yyaccept = 0; YYMARKER = ++YYCURSOR; YYFILL(3); yych = *YYCURSOR; @@ -2165,7 +2163,6 @@ yy108: } yy110: YYDEBUG(110, *YYCURSOR); - yyaccept = 0; YYMARKER = ++YYCURSOR; YYFILL(3); yych = *YYCURSOR; @@ -6707,7 +6704,6 @@ yyc_ST_LOOKING_FOR_VARNAME: } yy819: YYDEBUG(819, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); { static void *yytarget[256] = { @@ -6934,7 +6930,6 @@ yyc_ST_VAR_OFFSET: } yy833: YYDEBUG(833, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if (yych <= 'a') { if (yych <= '/') goto yy834; diff --git a/re2c/test/php20150211_phpdbg_lexer.igcFd.c b/re2c/test/php20150211_phpdbg_lexer.igcFd.c index 898aebaf..6138f1d8 100644 --- a/re2c/test/php20150211_phpdbg_lexer.igcFd.c +++ b/re2c/test/php20150211_phpdbg_lexer.igcFd.c @@ -174,21 +174,18 @@ yy8: goto yy4; yy9: YYDEBUG(9, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if (yych == 'V') goto yy24; if (yych == 'v') goto yy24; goto yy4; yy10: YYDEBUG(10, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if (yych == 'H') goto yy20; if (yych == 'h') goto yy20; goto yy4; yy11: YYDEBUG(11, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if (yybm[0+yych] & 64) { goto yy17; @@ -198,7 +195,6 @@ yy11: goto yy4; yy12: YYDEBUG(12, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if (yych == 'U') goto yy14; if (yych == 'u') goto yy14; @@ -1367,7 +1363,6 @@ yy131: goto yy127; yy132: YYDEBUG(132, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if (yych == 'r') goto yy134; goto yy127; diff --git a/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c b/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c index 430637a3..78eca42b 100644 --- a/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c +++ b/re2c/test/php20150211_zend_ini_scanner.igcFd--case-inverted.c @@ -490,7 +490,6 @@ yy4: } yy5: YYDEBUG(5, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); goto yy64; yy6: @@ -517,7 +516,6 @@ yy9: goto yy8; yy10: YYDEBUG(10, *YYCURSOR); - yyaccept = 1; yych = *(YYMARKER = ++YYCURSOR); { static void *yytarget[256] = { @@ -570,7 +568,6 @@ yy13: goto yy26; yy14: YYDEBUG(14, *YYCURSOR); - yyaccept = 2; yych = *(YYMARKER = ++YYCURSOR); goto yy59; YYDEBUG(15, *YYCURSOR); @@ -1080,7 +1077,6 @@ yy62: goto yy61; yy63: YYDEBUG(63, *YYCURSOR); - yyaccept = 0; YYMARKER = ++YYCURSOR; YYFILL(2); yych = *YYCURSOR; @@ -1125,7 +1121,6 @@ yy64: } yy65: YYDEBUG(65, *YYCURSOR); - yyaccept = 1; YYMARKER = ++YYCURSOR; YYFILL(2); yych = *YYCURSOR; @@ -2078,7 +2073,6 @@ end_raw_value_chars: } yy137: YYDEBUG(137, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); { static void *yytarget[256] = { @@ -2135,7 +2129,6 @@ yy140: goto yy139; yy141: YYDEBUG(141, *YYCURSOR); - yyaccept = 1; yych = *(YYMARKER = ++YYCURSOR); goto yy143; yy142: @@ -2171,7 +2164,6 @@ yy147: goto yy139; yy148: YYDEBUG(148, *YYCURSOR); - yyaccept = 2; YYMARKER = ++YYCURSOR; YYFILL(2); yych = *YYCURSOR; diff --git a/re2c/test/php20150211_zend_language_scanner.igcFd--case-inverted.c b/re2c/test/php20150211_zend_language_scanner.igcFd--case-inverted.c index f457f78b..913cfef3 100644 --- a/re2c/test/php20150211_zend_language_scanner.igcFd--case-inverted.c +++ b/re2c/test/php20150211_zend_language_scanner.igcFd--case-inverted.c @@ -1162,7 +1162,6 @@ yy5: goto yy4; yy6: YYDEBUG(6, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); if (yych <= 'O') { if (yych == '=') goto yy8; @@ -1390,7 +1389,6 @@ yy25: } yy27: YYDEBUG(27, *YYCURSOR); - yyaccept = 0; YYMARKER = ++YYCURSOR; YYFILL(3); yych = *YYCURSOR; @@ -1630,7 +1628,6 @@ yy47: } yy49: YYDEBUG(49, *YYCURSOR); - yyaccept = 0; YYMARKER = ++YYCURSOR; YYFILL(3); yych = *YYCURSOR; @@ -1901,7 +1898,6 @@ yy71: } yy73: YYDEBUG(73, *YYCURSOR); - yyaccept = 0; YYMARKER = ++YYCURSOR; YYFILL(3); yych = *YYCURSOR; @@ -7550,7 +7546,6 @@ yyc_ST_LOOKING_FOR_VARNAME: } yy778: YYDEBUG(778, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); { static void *yytarget[256] = { @@ -7775,7 +7770,6 @@ yyc_ST_VAR_OFFSET: } yy792: YYDEBUG(792, *YYCURSOR); - yyaccept = 0; yych = *(YYMARKER = ++YYCURSOR); { static void *yytarget[256] = { -- 2.40.0