From 5396689125a350f05cfdd30a2b7692eb26b0ccf4 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 22 Feb 2016 13:51:57 +0000 Subject: [PATCH] Mark rules as nullable rather than keep a separate collection of nullable rules. --- re2c/Makefile.am | 1 - re2c/src/ir/compile.cc | 1 - re2c/src/ir/regexp/nullable.cc | 45 -------------------- re2c/src/ir/regexp/regexp.cc | 28 ++++++++++-- re2c/src/ir/regexp/regexp.h | 2 - re2c/src/ir/skeleton/skeleton.cc | 1 - re2c/src/ir/skeleton/skeleton.h | 1 - re2c/src/ir/skeleton/unreachable_nullable.cc | 9 ++-- re2c/src/parse/rules.h | 3 ++ 9 files changed, 32 insertions(+), 59 deletions(-) delete mode 100644 re2c/src/ir/regexp/nullable.cc diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 563983b8..311c885e 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -93,7 +93,6 @@ SRC = \ src/ir/regexp/encoding/utf16/utf16_regexp.cc \ src/ir/regexp/encoding/utf16/utf16.cc \ src/ir/regexp/encoding/utf16/utf16_range.cc \ - src/ir/regexp/nullable.cc \ src/ir/regexp/regexp.cc \ src/ir/regexp/split_charset.cc \ src/ir/compile.cc \ diff --git a/re2c/src/ir/compile.cc b/re2c/src/ir/compile.cc index 6826cd8c..3516df25 100644 --- a/re2c/src/ir/compile.cc +++ b/re2c/src/ir/compile.cc @@ -54,7 +54,6 @@ smart_ptr compile (Spec & spec, Output & output, const std::string & cond, // skeleton must be constructed after DFA construction // but prior to any other DFA transformations Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line); - nullable_rules(spec.re, skeleton->nullable_rules); minimization(dfa); diff --git a/re2c/src/ir/regexp/nullable.cc b/re2c/src/ir/regexp/nullable.cc deleted file mode 100644 index e58096fe..00000000 --- a/re2c/src/ir/regexp/nullable.cc +++ /dev/null @@ -1,45 +0,0 @@ -#include "src/ir/regexp/regexp.h" - -namespace re2c -{ - -static bool nullable(const RegExp *re) -{ - switch (re->tag) { - default: - case RegExp::NIL: - return true; - case RegExp::SYM: - return false; - case RegExp::ALT: - return nullable(re->pld.alt.re1) - || nullable(re->pld.alt.re2); - case RegExp::CAT: - return nullable(re->pld.cat.re1) - && nullable(re->pld.cat.re2); - case RegExp::ITER: - return true; - case RegExp::RULE: - return nullable(re->pld.rule.re); - } -} - -void nullable_rules(const RegExp *re, std::vector &rs) -{ - switch (re->tag) { - case RegExp::ALT: - nullable_rules(re->pld.alt.re1, rs); - nullable_rules(re->pld.alt.re2, rs); - break; - case RegExp::RULE: - if (nullable(re->pld.rule.re)) { - rs.push_back(re->pld.rule.info); - } - break; - default: - break; - } -} - -} // end namespace re2c - diff --git a/re2c/src/ir/regexp/regexp.cc b/re2c/src/ir/regexp/regexp.cc index b260ff39..d3a44154 100644 --- a/re2c/src/ir/regexp/regexp.cc +++ b/re2c/src/ir/regexp/regexp.cc @@ -16,6 +16,7 @@ namespace re2c { static uint32_t fixlen(const RegExp *re); +static bool nullable(const RegExp *re); free_list RegExp::flist; @@ -225,7 +226,7 @@ const RegExp* RegExp::rule(const Loc &loc, const RegExp *r1, const RegExp *r2, ctx_len = ~0u; } - re->pld.rule.info = new RuleInfo(loc, rank, code, newcond, ctx_len); + re->pld.rule.info = new RuleInfo(loc, rank, code, newcond, ctx_len, nullable(r1)); return re; } @@ -237,8 +238,8 @@ const RegExp* RegExp::rule_copy(const RegExp *rule, rule_rank_t rank) re->pld.rule.re = rule->pld.rule.re; re->pld.rule.ctx = rule->pld.rule.ctx; const RuleInfo *info = rule->pld.rule.info; - re->pld.rule.info = new RuleInfo(info->loc, rank, - info->code, &info->newcond, info->ctx_len); + re->pld.rule.info = new RuleInfo(info->loc, rank, info->code, + &info->newcond, info->ctx_len, info->nullable); return re; } @@ -274,4 +275,25 @@ uint32_t fixlen(const RegExp *re) } } +bool nullable(const RegExp *re) +{ + switch (re->tag) { + default: + case RegExp::NIL: + return true; + case RegExp::SYM: + return false; + case RegExp::ALT: + return nullable(re->pld.alt.re1) + || nullable(re->pld.alt.re2); + case RegExp::CAT: + return nullable(re->pld.cat.re1) + && nullable(re->pld.cat.re2); + case RegExp::ITER: + return true; + case RegExp::RULE: + return nullable(re->pld.rule.re); + } +} + } // namespace re2c diff --git a/re2c/src/ir/regexp/regexp.h b/re2c/src/ir/regexp/regexp.h index 0cdef9e3..9e1a3fee 100644 --- a/re2c/src/ir/regexp/regexp.h +++ b/re2c/src/ir/regexp/regexp.h @@ -110,8 +110,6 @@ private: }; void split(const RegExp *re, std::set &cs); -void nullable_rules(const RegExp *re, std::vector &rs); - const RegExp *mkAlt(const RegExp *re1, const RegExp *re2); const RegExp *doAlt(const RegExp *re1, const RegExp *re2); const RegExp *doCat(const RegExp *re1, const RegExp *re2); diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index a678246e..cbc3c7f0 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -82,7 +82,6 @@ Skeleton::Skeleton , nodes (new Node [nodes_count + 1]) // +1 for default state , sizeof_key (4) , rules (rs) - , nullable_rules () { const size_t nc = cs.size() - 1; diff --git a/re2c/src/ir/skeleton/skeleton.h b/re2c/src/ir/skeleton/skeleton.h index bbda6b23..f89135dc 100644 --- a/re2c/src/ir/skeleton/skeleton.h +++ b/re2c/src/ir/skeleton/skeleton.h @@ -114,7 +114,6 @@ struct Skeleton Node * nodes; size_t sizeof_key; rules_t rules; - std::vector nullable_rules; Skeleton ( const dfa_t &dfa diff --git a/re2c/src/ir/skeleton/unreachable_nullable.cc b/re2c/src/ir/skeleton/unreachable_nullable.cc index a46c51b6..7dbfd3dd 100644 --- a/re2c/src/ir/skeleton/unreachable_nullable.cc +++ b/re2c/src/ir/skeleton/unreachable_nullable.cc @@ -78,13 +78,12 @@ void Skeleton::warn_unreachable_nullable_rules () // - rules that match empty strins with nonempty trailing context // false positives on partially shadowed (yet reachable) rules, e.g.: // [^]? - for (std::vector::const_iterator i = nullable_rules.begin(); - i != nullable_rules.end(); ++i) + for (rules_t::const_iterator i = rules.begin(); i != rules.end(); ++i) { - const RuleInfo *ri = *i; - if (ri->reachable) + const RuleInfo *r = *i; + if (r->nullable && r->reachable) { - warn.match_empty_string(ri->loc.line); + warn.match_empty_string(r->loc.line); } } } diff --git a/re2c/src/parse/rules.h b/re2c/src/parse/rules.h index 952e7321..ce6f69f1 100644 --- a/re2c/src/parse/rules.h +++ b/re2c/src/parse/rules.h @@ -29,6 +29,7 @@ struct RuleInfo std::set shadow; bool reachable; + bool nullable; RuleInfo ( const Loc & l @@ -36,6 +37,7 @@ struct RuleInfo , const Code * c , const std::string * cond , uint32_t cl + , bool nl ) : loc (l) , ctx_len (cl) @@ -44,6 +46,7 @@ struct RuleInfo , newcond (cond ? *cond : "") , shadow () , reachable (false) + , nullable (nl) {} FORBID_COPY(RuleInfo); -- 2.40.0