From 67d11d5bf78bfc2340c08e8932e4fc29bf667d4c Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Thu, 27 Aug 2015 23:30:03 +0100 Subject: [PATCH] Don't mix up empty code block with nonexistent one in rule actions. Turns out that re2c allows empty code blocks: /*!re2c "a" := */ re2c up to 0.14.3 handled this correctly (generated empty action). Since then this behaviour has been broken: re2c started to autogenerate jump to nonexistent condition. --- re2c/bootstrap/src/parse/parser.cc | 135 ++++++++++++++++++++++------- re2c/src/codegen/emit_action.cc | 24 ++--- re2c/src/codegen/prepare_dfa.cc | 2 +- re2c/src/ir/regexp/regexp_rule.h | 25 ++++-- re2c/src/parse/code.h | 6 -- re2c/src/parse/parser.ypp | 119 +++++++++++++++++++------ re2c/test/empty_code.c.c | 27 ++++++ re2c/test/empty_code.c.re | 5 ++ 8 files changed, 262 insertions(+), 81 deletions(-) create mode 100644 re2c/test/empty_code.c.c create mode 100644 re2c/test/empty_code.c.re diff --git a/re2c/bootstrap/src/parse/parser.cc b/re2c/bootstrap/src/parse/parser.cc index 2dfa94bf..f46c7b43 100644 --- a/re2c/bootstrap/src/parse/parser.cc +++ b/re2c/bootstrap/src/parse/parser.cc @@ -136,7 +136,14 @@ void context_none(CondList *clist) in->fatal("no expression specified"); } -void context_rule(CondList *clist, RegExp *expr, RegExp *look, const std::string * newcond, const Code & code) +void context_rule + ( CondList * clist + , const Loc & loc + , RegExp * expr + , RegExp * look + , const Code * code + , const std::string * newcond + ) { context_check(clist); const RegExp::InsAccess ins_access = clist->size() > 1 @@ -144,7 +151,15 @@ void context_rule(CondList *clist, RegExp *expr, RegExp *look, const std::string : RegExp::SHARED; for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) { - RuleOp *rule = new RuleOp(expr, look, code, rank_counter.next (), ins_access, newcond); + RuleOp * rule = new RuleOp + ( loc + , expr + , look + , rank_counter.next () + , ins_access + , code + , newcond + ); RegExpMap::iterator itRE = specMap.find(*it); @@ -645,14 +660,14 @@ static const yytype_int8 yyrhs[] = /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 226, 226, 228, 232, 236, 245, 254, 258, 262, - 271, 276, 281, 286, 291, 296, 301, 309, 313, 319, - 323, 327, 333, 337, 343, 356, 361, 369, 374, 378, - 383, 387, 391, 395, 401, 405, 409, 413, 420, 429, - 438, 442, 448, 453, 459, 463, 469, 477, 482, 488, - 494, 504, 516, 522, 530, 533, 540, 546, 556, 559, - 567, 570, 577, 581, 588, 592, 599, 603, 610, 614, - 629, 649, 653, 657, 661, 668, 678, 682 + 0, 241, 241, 243, 247, 251, 260, 269, 273, 277, + 286, 291, 296, 301, 306, 311, 316, 324, 328, 334, + 338, 342, 348, 352, 358, 371, 376, 384, 389, 393, + 398, 402, 406, 410, 416, 420, 424, 428, 435, 452, + 461, 465, 471, 476, 482, 486, 501, 518, 523, 529, + 535, 553, 573, 579, 587, 590, 597, 603, 613, 616, + 624, 627, 634, 638, 645, 649, 656, 660, 667, 671, + 686, 706, 710, 714, 718, 725, 735, 739 }; #endif @@ -1989,7 +2004,15 @@ yyreduce: { in->fatal("condition or '<*>' required when using -c switch"); } - (yyval.regexp) = new RuleOp((yyvsp[(1) - (3)].regexp), (yyvsp[(2) - (3)].regexp), *(yyvsp[(3) - (3)].code), rank_counter.next (), RegExp::SHARED, NULL); + (yyval.regexp) = new RuleOp + ( (yyvsp[(3) - (3)].code)->loc + , (yyvsp[(1) - (3)].regexp) + , (yyvsp[(2) - (3)].regexp) + , rank_counter.next () + , RegExp::SHARED + , (yyvsp[(3) - (3)].code) + , NULL + ); spec = spec? mkAlt(spec, (yyval.regexp)) : (yyval.regexp); ;} break; @@ -2009,7 +2032,7 @@ yyreduce: case 40: { - context_rule((yyvsp[(2) - (7)].clist), (yyvsp[(4) - (7)].regexp), (yyvsp[(5) - (7)].regexp), (yyvsp[(6) - (7)].str), *(yyvsp[(7) - (7)].code)); + context_rule ((yyvsp[(2) - (7)].clist), (yyvsp[(7) - (7)].code)->loc, (yyvsp[(4) - (7)].regexp), (yyvsp[(5) - (7)].regexp), (yyvsp[(7) - (7)].code), (yyvsp[(6) - (7)].str)); ;} break; @@ -2017,8 +2040,8 @@ yyreduce: { assert((yyvsp[(7) - (7)].str)); - const Code * code = new Code (in->get_fname (), in->get_cline ()); - context_rule((yyvsp[(2) - (7)].clist), (yyvsp[(4) - (7)].regexp), (yyvsp[(5) - (7)].regexp), (yyvsp[(7) - (7)].str), *code); + Loc loc (in->get_fname (), in->get_cline ()); + context_rule ((yyvsp[(2) - (7)].clist), loc, (yyvsp[(4) - (7)].regexp), (yyvsp[(5) - (7)].regexp), NULL, (yyvsp[(7) - (7)].str)); ;} break; @@ -2050,7 +2073,16 @@ yyreduce: { context_check(NULL); - specStar.push_back(new RuleOp((yyvsp[(4) - (7)].regexp), (yyvsp[(5) - (7)].regexp), *(yyvsp[(7) - (7)].code), rank_counter.next (), RegExp::PRIVATE, (yyvsp[(6) - (7)].str))); + RuleOp * rule = new RuleOp + ( (yyvsp[(7) - (7)].code)->loc + , (yyvsp[(4) - (7)].regexp) + , (yyvsp[(5) - (7)].regexp) + , rank_counter.next () + , RegExp::PRIVATE + , (yyvsp[(7) - (7)].code) + , (yyvsp[(6) - (7)].str) + ); + specStar.push_back (rule); delete (yyvsp[(6) - (7)].str); ;} break; @@ -2060,8 +2092,17 @@ yyreduce: { assert((yyvsp[(7) - (7)].str)); context_check(NULL); - const Code * code = new Code (in->get_fname (), in->get_cline ()); - specStar.push_back(new RuleOp((yyvsp[(4) - (7)].regexp), (yyvsp[(5) - (7)].regexp), *code, rank_counter.next (), RegExp::PRIVATE, (yyvsp[(7) - (7)].str))); + Loc loc (in->get_fname (), in->get_cline ()); + RuleOp * rule = new RuleOp + ( loc + , (yyvsp[(4) - (7)].regexp) + , (yyvsp[(5) - (7)].regexp) + , rank_counter.next () + , RegExp::PRIVATE + , NULL + , (yyvsp[(7) - (7)].str) + ); + specStar.push_back (rule); delete (yyvsp[(7) - (7)].str); ;} break; @@ -2100,7 +2141,15 @@ yyreduce: { in->fatal("code to handle illegal condition already defined"); } - (yyval.regexp) = specNone = new RuleOp(new NullOp(), new NullOp(), *(yyvsp[(3) - (3)].code), rank_counter.next (), RegExp::SHARED, (yyvsp[(2) - (3)].str)); + (yyval.regexp) = specNone = new RuleOp + ( (yyvsp[(3) - (3)].code)->loc + , new NullOp + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , (yyvsp[(3) - (3)].code) + , (yyvsp[(2) - (3)].str) + ); delete (yyvsp[(2) - (3)].str); ;} break; @@ -2114,8 +2163,16 @@ yyreduce: { in->fatal("code to handle illegal condition already defined"); } - const Code * code = new Code (in->get_fname (), in->get_cline ()); - (yyval.regexp) = specNone = new RuleOp(new NullOp(), new NullOp(), *code, rank_counter.next (), RegExp::SHARED, (yyvsp[(3) - (3)].str)); + Loc loc (in->get_fname (), in->get_cline ()); + (yyval.regexp) = specNone = new RuleOp + ( loc + , new NullOp + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , NULL + , (yyvsp[(3) - (3)].str) + ); delete (yyvsp[(3) - (3)].str); ;} break; @@ -2643,7 +2700,6 @@ void parse(Scanner& i, Output & o) { RegExpMap::iterator it; SetupMap::const_iterator itRuleSetup; - DefaultMap::const_iterator itRuleDefault; if (parseMode != Scanner::Reuse) { @@ -2702,21 +2758,26 @@ void parse(Scanner& i, Output & o) yySetupRule = ""; } } - itRuleDefault = ruleDefaultMap.find(it->first); - if (itRuleDefault != ruleDefaultMap.end()) + + DefaultMap::const_iterator def = ruleDefaultMap.find (it->first); + if (def == ruleDefaultMap.end ()) { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), *(itRuleDefault->second), rank_counter.next (), RegExp::SHARED, NULL); - it->second = it->second ? mkAlt(def, it->second) : def; + def = ruleDefaultMap.find ("*"); } - else + if (def != ruleDefaultMap.end ()) { - itRuleDefault = ruleDefaultMap.find("*"); - if (itRuleDefault != ruleDefaultMap.end()) - { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), *(itRuleDefault->second), rank_counter.next (), RegExp::SHARED, NULL); - it->second = it->second ? mkAlt(def, it->second) : def; - } + RuleOp * def_rule = new RuleOp + ( def->second->loc + , in->mkDefault () + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , def->second + , NULL + ); + it->second = it->second ? mkAlt (def_rule, it->second) : def_rule; } + dfa_map[it->first] = genCode(it->second, o, topIndent, it->first); } if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) @@ -2729,7 +2790,15 @@ void parse(Scanner& i, Output & o) { if (ruleDefault != NULL && parseMode != Scanner::Reuse) { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), *ruleDefault, rank_counter.next (), RegExp::SHARED, NULL); + RuleOp * def = new RuleOp + ( ruleDefault->loc + , in->mkDefault () + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , ruleDefault + , NULL + ); spec = spec ? mkAlt(def, spec) : def; } if (spec || !dfa_map.empty()) diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index 81eb9147..ea4a9ca0 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -233,7 +233,12 @@ void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleO { if (DFlag) { - o << s->label << " [label=\"" << rule->code.loc.filename << ":" << rule->code.loc.line << "\"]\n"; + o << s->label; + if (rule->code) + { + o << " [label=\"" << rule->code->loc.filename << ":" << rule->code->loc.line << "\"]"; + } + o << "\n"; return; } @@ -255,26 +260,25 @@ void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleO } else { - if (rule->newcond.length() && condName != rule->newcond) + if (!rule->newcond.empty () && condName != rule->newcond) { genSetCondition(o, ind, rule->newcond); } - const bool autogen = rule->code.text.empty (); - if (autogen) - { - o << indent (ind) << replaceParam(condGoto, condGotoParam, condPrefix + rule->newcond) << "\n"; - } - else + if (rule->code) { if (!yySetupRule.empty ()) { o << indent(ind) << yySetupRule << "\n"; } - o.write_line_info (rule->code.loc.line, rule->code.loc.filename.c_str ()); - o << indent (ind) << rule->code.text << "\n"; + o.write_line_info (rule->code->loc.line, rule->code->loc.filename.c_str ()); + o << indent (ind) << rule->code->text << "\n"; o.insert_line_info (); } + else if (!rule->newcond.empty ()) + { + o << indent (ind) << replaceParam(condGoto, condGotoParam, condPrefix + rule->newcond) << "\n"; + } } } diff --git a/re2c/src/codegen/prepare_dfa.cc b/re2c/src/codegen/prepare_dfa.cc index 7822d285..cbe2c4f0 100644 --- a/re2c/src/codegen/prepare_dfa.cc +++ b/re2c/src/codegen/prepare_dfa.cc @@ -257,7 +257,7 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill, const std::string & cond) // warn about not shadowed rule that matches empty string if (empty_rule && !stray_cunits.empty ()) { - warn.match_empty_string (head->rule->code.loc.line); + warn.match_empty_string (head->rule->loc.line); } // split ``base'' states into two parts diff --git a/re2c/src/ir/regexp/regexp_rule.h b/re2c/src/ir/regexp/regexp_rule.h index 53a61089..208b9d88 100644 --- a/re2c/src/ir/regexp/regexp_rule.h +++ b/re2c/src/ir/regexp/regexp_rule.h @@ -12,22 +12,35 @@ namespace re2c class RuleOp: public RegExp { +public: + const Loc loc; + +private: RegExp * exp; public: RegExp * ctx; Ins * ins; rule_rank_t rank; - const Code & code; + const Code * code; const std::string newcond; - inline RuleOp (RegExp * e, RegExp * c, const Code & t, rule_rank_t r, InsAccess access, const std::string * n) - : exp (e) - , ctx (c) + inline RuleOp + ( const Loc & l + , RegExp * r1 + , RegExp * r2 + , rule_rank_t r + , InsAccess access + , const Code * c + , const std::string * cond + ) + : loc (l) + , exp (r1) + , ctx (r2) , ins (NULL) , rank (r) - , code (t) - , newcond (n ? *n : "") + , code (c) + , newcond (cond ? *cond : "") { ins_access = access; } diff --git a/re2c/src/parse/code.h b/re2c/src/parse/code.h index 10d87389..323ba5f9 100644 --- a/re2c/src/parse/code.h +++ b/re2c/src/parse/code.h @@ -21,12 +21,6 @@ struct Code { freelist.insert (this); } - inline Code (const std::string & f, uint32_t l) - : loc (f, l) - , text () - { - freelist.insert (this); - } }; } // namespace re2c diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp index a1d04cef..7d96edd9 100644 --- a/re2c/src/parse/parser.ypp +++ b/re2c/src/parse/parser.ypp @@ -68,7 +68,14 @@ void context_none(CondList *clist) in->fatal("no expression specified"); } -void context_rule(CondList *clist, RegExp *expr, RegExp *look, const std::string * newcond, const Code & code) +void context_rule + ( CondList * clist + , const Loc & loc + , RegExp * expr + , RegExp * look + , const Code * code + , const std::string * newcond + ) { context_check(clist); const RegExp::InsAccess ins_access = clist->size() > 1 @@ -76,7 +83,15 @@ void context_rule(CondList *clist, RegExp *expr, RegExp *look, const std::string : RegExp::SHARED; for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) { - RuleOp *rule = new RuleOp(expr, look, code, rank_counter.next (), ins_access, newcond); + RuleOp * rule = new RuleOp + ( loc + , expr + , look + , rank_counter.next () + , ins_access + , code + , newcond + ); RegExpMap::iterator itRE = specMap.find(*it); @@ -423,7 +438,15 @@ rule: { in->fatal("condition or '<*>' required when using -c switch"); } - $$ = new RuleOp($1, $2, *$3, rank_counter.next (), RegExp::SHARED, NULL); + $$ = new RuleOp + ( $3->loc + , $1 + , $2 + , rank_counter.next () + , RegExp::SHARED + , $3 + , NULL + ); spec = spec? mkAlt(spec, $$) : $$; } | STAR CODE /* default rule */ @@ -437,13 +460,13 @@ rule: } | '<' cond '>' expr look newcond CODE { - context_rule($2, $4, $5, $6, *$7); + context_rule ($2, $7->loc, $4, $5, $7, $6); } | '<' cond '>' expr look ':' newcond { assert($7); - const Code * code = new Code (in->get_fname (), in->get_cline ()); - context_rule($2, $4, $5, $7, *code); + Loc loc (in->get_fname (), in->get_cline ()); + context_rule ($2, loc, $4, $5, NULL, $7); } | '<' cond '>' look newcond CODE { @@ -463,15 +486,33 @@ rule: | '<' STAR '>' expr look newcond CODE { context_check(NULL); - specStar.push_back(new RuleOp($4, $5, *$7, rank_counter.next (), RegExp::PRIVATE, $6)); + RuleOp * rule = new RuleOp + ( $7->loc + , $4 + , $5 + , rank_counter.next () + , RegExp::PRIVATE + , $7 + , $6 + ); + specStar.push_back (rule); delete $6; } | '<' STAR '>' expr look ':' newcond { assert($7); context_check(NULL); - const Code * code = new Code (in->get_fname (), in->get_cline ()); - specStar.push_back(new RuleOp($4, $5, *code, rank_counter.next (), RegExp::PRIVATE, $7)); + Loc loc (in->get_fname (), in->get_cline ()); + RuleOp * rule = new RuleOp + ( loc + , $4 + , $5 + , rank_counter.next () + , RegExp::PRIVATE + , NULL + , $7 + ); + specStar.push_back (rule); delete $7; } | '<' STAR '>' look newcond CODE @@ -498,7 +539,15 @@ rule: { in->fatal("code to handle illegal condition already defined"); } - $$ = specNone = new RuleOp(new NullOp(), new NullOp(), *$3, rank_counter.next (), RegExp::SHARED, $2); + $$ = specNone = new RuleOp + ( $3->loc + , new NullOp + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , $3 + , $2 + ); delete $2; } | NOCOND ':' newcond @@ -509,8 +558,16 @@ rule: { in->fatal("code to handle illegal condition already defined"); } - const Code * code = new Code (in->get_fname (), in->get_cline ()); - $$ = specNone = new RuleOp(new NullOp(), new NullOp(), *code, rank_counter.next (), RegExp::SHARED, $3); + Loc loc (in->get_fname (), in->get_cline ()); + $$ = specNone = new RuleOp + ( loc + , new NullOp + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , NULL + , $3 + ); delete $3; } | SETUP STAR '>' CODE @@ -778,7 +835,6 @@ void parse(Scanner& i, Output & o) { RegExpMap::iterator it; SetupMap::const_iterator itRuleSetup; - DefaultMap::const_iterator itRuleDefault; if (parseMode != Scanner::Reuse) { @@ -837,21 +893,26 @@ void parse(Scanner& i, Output & o) yySetupRule = ""; } } - itRuleDefault = ruleDefaultMap.find(it->first); - if (itRuleDefault != ruleDefaultMap.end()) + + DefaultMap::const_iterator def = ruleDefaultMap.find (it->first); + if (def == ruleDefaultMap.end ()) { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), *(itRuleDefault->second), rank_counter.next (), RegExp::SHARED, NULL); - it->second = it->second ? mkAlt(def, it->second) : def; + def = ruleDefaultMap.find ("*"); } - else + if (def != ruleDefaultMap.end ()) { - itRuleDefault = ruleDefaultMap.find("*"); - if (itRuleDefault != ruleDefaultMap.end()) - { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), *(itRuleDefault->second), rank_counter.next (), RegExp::SHARED, NULL); - it->second = it->second ? mkAlt(def, it->second) : def; - } + RuleOp * def_rule = new RuleOp + ( def->second->loc + , in->mkDefault () + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , def->second + , NULL + ); + it->second = it->second ? mkAlt (def_rule, it->second) : def_rule; } + dfa_map[it->first] = genCode(it->second, o, topIndent, it->first); } if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) @@ -864,7 +925,15 @@ void parse(Scanner& i, Output & o) { if (ruleDefault != NULL && parseMode != Scanner::Reuse) { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), *ruleDefault, rank_counter.next (), RegExp::SHARED, NULL); + RuleOp * def = new RuleOp + ( ruleDefault->loc + , in->mkDefault () + , new NullOp + , rank_counter.next () + , RegExp::SHARED + , ruleDefault + , NULL + ); spec = spec ? mkAlt(def, spec) : def; } if (spec || !dfa_map.empty()) diff --git a/re2c/test/empty_code.c.c b/re2c/test/empty_code.c.c new file mode 100644 index 00000000..a59ac5aa --- /dev/null +++ b/re2c/test/empty_code.c.c @@ -0,0 +1,27 @@ +re2c: warning: line 3: naked default case in condition a (stray code units: [0x00-`][b-0xFF]), better add default rule * [-Wnaked-default] +/* Generated by re2c */ +#line 1 "empty_code.c.re" + +#line 5 "" +{ + YYCTYPE yych; + switch (YYGETCONDITION()) { + case yyca: goto yyc_a; + } +/* *********************************** */ +yyc_a: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy4; + default: goto yy3; + } +yy3: +yy4: + ++YYCURSOR; +#line 3 "empty_code.c.re" + +#line 24 "" +} +#line 3 "empty_code.c.re" + diff --git a/re2c/test/empty_code.c.re b/re2c/test/empty_code.c.re new file mode 100644 index 00000000..e4b35726 --- /dev/null +++ b/re2c/test/empty_code.c.re @@ -0,0 +1,5 @@ +/*!re2c + + "a" := + +*/ -- 2.40.0