From f781c3a3a07cde0243c894da640070d6c62ef4bc Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 22 Feb 2016 15:47:44 +0000 Subject: [PATCH] Code cleanup: moved rule construction to a separate file. --- re2c/Makefile.am | 1 + re2c/bootstrap/src/parse/lex.cc | 2 +- re2c/bootstrap/src/parse/parser.cc | 32 +++++++---- re2c/src/ir/regexp/make_rule.cc | 91 ++++++++++++++++++++++++++++++ re2c/src/ir/regexp/regexp.cc | 88 ----------------------------- re2c/src/ir/regexp/regexp.h | 11 +++- re2c/src/parse/parser.h | 3 + re2c/src/parse/parser.ypp | 32 +++++++---- 8 files changed, 146 insertions(+), 114 deletions(-) create mode 100644 re2c/src/ir/regexp/make_rule.cc diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 311c885e..71cfb4bc 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -93,6 +93,7 @@ SRC = \ src/ir/regexp/encoding/utf16/utf16_regexp.cc \ src/ir/regexp/encoding/utf16/utf16.cc \ src/ir/regexp/encoding/utf16/utf16_range.cc \ + src/ir/regexp/make_rule.cc \ src/ir/regexp/regexp.cc \ src/ir/regexp/split_charset.cc \ src/ir/compile.cc \ diff --git a/re2c/bootstrap/src/parse/lex.cc b/re2c/bootstrap/src/parse/lex.cc index ef098f89..13fd0ccc 100644 --- a/re2c/bootstrap/src/parse/lex.cc +++ b/re2c/bootstrap/src/parse/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.16 on Mon Feb 22 13:12:30 2016 */ +/* Generated by re2c 0.16 on Mon Feb 22 15:20:41 2016 */ #line 1 "../src/parse/lex.re" #include "src/util/c99_stdint.h" #include diff --git a/re2c/bootstrap/src/parse/parser.cc b/re2c/bootstrap/src/parse/parser.cc index 9307c37b..4a458d6d 100644 --- a/re2c/bootstrap/src/parse/parser.cc +++ b/re2c/bootstrap/src/parse/parser.cc @@ -165,7 +165,7 @@ void context_rule condnames.push_back (*it); } - const RegExp *rule = RegExp::rule + const RegExp *rule = make_rule ( loc , expr , look @@ -202,7 +202,7 @@ void default_rule(CondList *clist, const Code * code) context_check(clist); for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) { - const RegExp * def = RegExp::rule + const RegExp * def = make_rule ( code->loc , in->mkDefault () , RegExp::nil() @@ -1619,7 +1619,7 @@ yyreduce: { in->fatal("condition or '<*>' required when using -c switch"); } - const RegExp * rule = RegExp::rule + const RegExp * rule = make_rule ( (yyvsp[(3) - (3)].code)->loc , (yyvsp[(1) - (3)].regexp) , (yyvsp[(2) - (3)].regexp) @@ -1636,7 +1636,7 @@ yyreduce: { if (opts->cFlag) in->fatal("condition or '<*>' required when using -c switch"); - const RegExp * def = RegExp::rule + const RegExp * def = make_rule ( (yyvsp[(2) - (2)].code)->loc , in->mkDefault () , RegExp::nil() @@ -1695,7 +1695,7 @@ yyreduce: { context_check(NULL); - const RegExp * rule = RegExp::rule + const RegExp * rule = make_rule ( (yyvsp[(7) - (7)].code)->loc , (yyvsp[(4) - (7)].regexp) , (yyvsp[(5) - (7)].regexp) @@ -1714,7 +1714,7 @@ yyreduce: assert((yyvsp[(7) - (7)].str)); context_check(NULL); Loc loc (in->get_fname (), in->get_cline ()); - const RegExp * rule = RegExp::rule + const RegExp * rule = make_rule ( loc , (yyvsp[(4) - (7)].regexp) , (yyvsp[(5) - (7)].regexp) @@ -1751,7 +1751,7 @@ yyreduce: { in->fatal ("code to default rule '*' is already defined"); } - star_default = RegExp::rule + star_default = make_rule ( (yyvsp[(5) - (5)].code)->loc , in->mkDefault () , RegExp::nil() @@ -1770,7 +1770,7 @@ yyreduce: { in->fatal("code to handle illegal condition already defined"); } - (yyval.regexp) = specNone = RegExp::rule + (yyval.regexp) = specNone = make_rule ( (yyvsp[(3) - (3)].code)->loc , RegExp::nil() , RegExp::nil() @@ -1792,7 +1792,7 @@ yyreduce: in->fatal("code to handle illegal condition already defined"); } Loc loc (in->get_fname (), in->get_cline ()); - (yyval.regexp) = specNone = RegExp::rule + (yyval.regexp) = specNone = make_rule ( loc , RegExp::nil() , RegExp::nil() @@ -2337,8 +2337,18 @@ void parse(Scanner& i, Output & o) { for (RuleList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) { - const RegExp *r = RegExp::rule_copy(*itOp, rank_counter.next()); - it->second.add (r); + // Different condition share rule regexps (regexps are immutable anyway), + // but must have an individual copy of rule info (rule reachability + // and shadow set are not the same for different conditions). + // Also must update rule rank to guarantee lowest priority. + const RegExp *re = *itOp; + const RuleInfo *info = re->pld.rule.info; + const RegExp *re_copy = RegExp::rule( + re->pld.rule.re, + re->pld.rule.ctx, + new RuleInfo(info->loc, rank_counter.next(), info->code, + &info->newcond, info->ctx_len, info->nullable)); + it->second.add(re_copy); } if (star_default) { diff --git a/re2c/src/ir/regexp/make_rule.cc b/re2c/src/ir/regexp/make_rule.cc new file mode 100644 index 00000000..b4087908 --- /dev/null +++ b/re2c/src/ir/regexp/make_rule.cc @@ -0,0 +1,91 @@ +#include "src/conf/opt.h" +#include "src/globals.h" +#include "src/ir/regexp/regexp.h" + +namespace re2c +{ + +static uint32_t fixlen(const RegExp *re); +static bool nullable(const RegExp *re); + +const RegExp *make_rule(const Loc &loc, const RegExp *r1, const RegExp *r2, + rule_rank_t rank, const Code *code, const std::string *newcond) +{ + uint32_t ctx_len = fixlen(r2); + // cannot emulate 'YYCURSOR -= N' operation with generic API + if (ctx_len != 0 + && opts->input_api.type() == InputAPI::CUSTOM) { + ctx_len = ~0u; + } + RuleInfo *info = new RuleInfo(loc, rank, code, + newcond, ctx_len, nullable(r1)); + return RegExp::rule(r1, r2, info); +} + +// Shallow-copy rule's regexps, but deep-copy rule's info. +// Used to duplicate <*> rules in conditions: different conditions +// share regexps (regexps are immutable anyway), but must have an +// individual copy of rule info (rule reachability and shadow set +// are different). Also update rule rank. +const RegExp *make_rule_copy(const RegExp *re, rule_rank_t rank) +{ + const RuleInfo *info = re->pld.rule.info; + RuleInfo *info_copy = new RuleInfo(info->loc, rank, info->code, + &info->newcond, info->ctx_len, info->nullable); + return RegExp::rule(re->pld.rule.re, re->pld.rule.ctx, info_copy); +} + +uint32_t fixlen(const RegExp *re) +{ + switch (re->tag) { + case RegExp::NIL: + return 0; + case RegExp::SYM: + return 1; + case RegExp::ALT: + { + const uint32_t l1 = fixlen(re->pld.alt.re1); + const uint32_t l2 = fixlen(re->pld.alt.re2); + return l1 == l2 ? l1 : ~0u; + } + case RegExp::CAT: + { + const uint32_t l1 = fixlen(re->pld.cat.re1); + if (l1 == ~0u) { + return ~0u; + } + const uint32_t l2 = fixlen(re->pld.cat.re2); + if (l2 == ~0u) { + return ~0u; + } + return l1 + l2; + } + case RegExp::ITER: + case RegExp::RULE: + default: + return ~0u; + } +} + +bool nullable(const RegExp *re) +{ + switch (re->tag) { + default: + case RegExp::NIL: + return true; + case RegExp::SYM: + return false; + case RegExp::ALT: + return nullable(re->pld.alt.re1) + || nullable(re->pld.alt.re2); + case RegExp::CAT: + return nullable(re->pld.cat.re1) + && nullable(re->pld.cat.re2); + case RegExp::ITER: + return true; + case RegExp::RULE: + return nullable(re->pld.rule.re); + } +} + +} // namespace re2c diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc index d3a44154..1f92895a 100644 --- a/re2c/src/ir/regexp/regexp.cc +++ b/re2c/src/ir/regexp/regexp.cc @@ -15,9 +15,6 @@ namespace re2c { -static uint32_t fixlen(const RegExp *re); -static bool nullable(const RegExp *re); - free_list RegExp::flist; const RegExp *doAlt(const RegExp *re1, const RegExp *re2) @@ -211,89 +208,4 @@ const RegExp *repeat_from(const RegExp *re, uint32_t n) RegExp::iter(re)); } -const RegExp* RegExp::rule(const Loc &loc, const RegExp *r1, const RegExp *r2, - rule_rank_t rank, const Code *code, const std::string *newcond) -{ - RegExp *re = new RegExp(RULE); - re->pld.rule.re = r1; - re->pld.rule.ctx = r2; - - uint32_t ctx_len = fixlen(r2); - // cannot emulate 'YYCURSOR -= N' operation with generic API - if (ctx_len != 0 - && opts->input_api.type() == InputAPI::CUSTOM) - { - ctx_len = ~0u; - } - - re->pld.rule.info = new RuleInfo(loc, rank, code, newcond, ctx_len, nullable(r1)); - return re; -} - -// shallow-copies regexps, but deep-copies rule info -// used to duplicate <*> rules in conditions -const RegExp* RegExp::rule_copy(const RegExp *rule, rule_rank_t rank) -{ - RegExp *re = new RegExp(RULE); - re->pld.rule.re = rule->pld.rule.re; - re->pld.rule.ctx = rule->pld.rule.ctx; - const RuleInfo *info = rule->pld.rule.info; - re->pld.rule.info = new RuleInfo(info->loc, rank, info->code, - &info->newcond, info->ctx_len, info->nullable); - return re; -} - -uint32_t fixlen(const RegExp *re) -{ - switch (re->tag) { - case RegExp::NIL: - return 0; - case RegExp::SYM: - return 1; - case RegExp::ALT: - { - const uint32_t l1 = fixlen(re->pld.alt.re1); - const uint32_t l2 = fixlen(re->pld.alt.re2); - return l1 == l2 ? l1 : ~0u; - } - case RegExp::CAT: - { - const uint32_t l1 = fixlen(re->pld.cat.re1); - if (l1 == ~0u) { - return ~0u; - } - const uint32_t l2 = fixlen(re->pld.cat.re2); - if (l2 == ~0u) { - return ~0u; - } - return l1 + l2; - } - case RegExp::ITER: - case RegExp::RULE: - default: - return ~0u; - } -} - -bool nullable(const RegExp *re) -{ - switch (re->tag) { - default: - case RegExp::NIL: - return true; - case RegExp::SYM: - return false; - case RegExp::ALT: - return nullable(re->pld.alt.re1) - || nullable(re->pld.alt.re2); - case RegExp::CAT: - return nullable(re->pld.cat.re1) - && nullable(re->pld.cat.re2); - case RegExp::ITER: - return true; - case RegExp::RULE: - return nullable(re->pld.rule.re); - } -} - } // namespace re2c diff --git a/re2c/src/ir/regexp/regexp.h b/re2c/src/ir/regexp/regexp.h index 9e1a3fee..542aa729 100644 --- a/re2c/src/ir/regexp/regexp.h +++ b/re2c/src/ir/regexp/regexp.h @@ -91,9 +91,14 @@ struct RegExp re->pld.iter.re = r; return re; } - static const RegExp *rule(const Loc &loc, const RegExp *r1, const RegExp *r2, - rule_rank_t rank, const Code *code, const std::string *newcond); - static const RegExp *rule_copy(const RegExp *rule, rule_rank_t rank); + static const RegExp *rule(const RegExp *r1, const RegExp *r2, RuleInfo *i) + { + RegExp *re = new RegExp(RULE); + re->pld.rule.re = r1; + re->pld.rule.ctx = r2; + re->pld.rule.info = i; + return re; + } inline ~RegExp() { if (tag == RULE) { diff --git a/re2c/src/parse/parser.h b/re2c/src/parse/parser.h index 69aeacf8..f48bd568 100644 --- a/re2c/src/parse/parser.h +++ b/re2c/src/parse/parser.h @@ -15,6 +15,9 @@ namespace re2c extern void parse(Scanner &, Output &); extern void parse_cleanup(); +const RegExp *make_rule(const Loc &loc, const RegExp *r1, const RegExp *r2, + rule_rank_t rank, const Code *code, const std::string *newcond); +const RegExp *make_rule_copy(const RegExp *re, rule_rank_t rank); typedef std::set CondList; typedef std::list RuleList; diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp index cfa8d593..f31d61fd 100644 --- a/re2c/src/parse/parser.ypp +++ b/re2c/src/parse/parser.ypp @@ -97,7 +97,7 @@ void context_rule condnames.push_back (*it); } - const RegExp *rule = RegExp::rule + const RegExp *rule = make_rule ( loc , expr , look @@ -134,7 +134,7 @@ void default_rule(CondList *clist, const Code * code) context_check(clist); for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) { - const RegExp * def = RegExp::rule + const RegExp * def = make_rule ( code->loc , in->mkDefault () , RegExp::nil() @@ -230,7 +230,7 @@ rule: { in->fatal("condition or '<*>' required when using -c switch"); } - const RegExp * rule = RegExp::rule + const RegExp * rule = make_rule ( $3->loc , $1 , $2 @@ -244,7 +244,7 @@ rule: { if (opts->cFlag) in->fatal("condition or '<*>' required when using -c switch"); - const RegExp * def = RegExp::rule + const RegExp * def = make_rule ( $2->loc , in->mkDefault () , RegExp::nil() @@ -285,7 +285,7 @@ rule: | '<' TOKEN_STAR '>' expr look newcond TOKEN_CODE { context_check(NULL); - const RegExp * rule = RegExp::rule + const RegExp * rule = make_rule ( $7->loc , $4 , $5 @@ -301,7 +301,7 @@ rule: assert($7); context_check(NULL); Loc loc (in->get_fname (), in->get_cline ()); - const RegExp * rule = RegExp::rule + const RegExp * rule = make_rule ( loc , $4 , $5 @@ -329,7 +329,7 @@ rule: { in->fatal ("code to default rule '*' is already defined"); } - star_default = RegExp::rule + star_default = make_rule ( $5->loc , in->mkDefault () , RegExp::nil() @@ -345,7 +345,7 @@ rule: { in->fatal("code to handle illegal condition already defined"); } - $$ = specNone = RegExp::rule + $$ = specNone = make_rule ( $3->loc , RegExp::nil() , RegExp::nil() @@ -364,7 +364,7 @@ rule: in->fatal("code to handle illegal condition already defined"); } Loc loc (in->get_fname (), in->get_cline ()); - $$ = specNone = RegExp::rule + $$ = specNone = make_rule ( loc , RegExp::nil() , RegExp::nil() @@ -649,8 +649,18 @@ void parse(Scanner& i, Output & o) { for (RuleList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) { - const RegExp *r = RegExp::rule_copy(*itOp, rank_counter.next()); - it->second.add (r); + // Different condition share rule regexps (regexps are immutable anyway), + // but must have an individual copy of rule info (rule reachability + // and shadow set are not the same for different conditions). + // Also must update rule rank to guarantee lowest priority. + const RegExp *re = *itOp; + const RuleInfo *info = re->pld.rule.info; + const RegExp *re_copy = RegExp::rule( + re->pld.rule.re, + re->pld.rule.ctx, + new RuleInfo(info->loc, rank_counter.next(), info->code, + &info->newcond, info->ctx_len, info->nullable)); + it->second.add(re_copy); } if (star_default) { -- 2.40.0