From: Ulya Trofimovich Date: Thu, 19 May 2016 10:37:11 +0000 (+0100) Subject: Don't bother with reachability when reporting nullable rules. X-Git-Tag: 1.0~39^2~290 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b47973942d6c89f22ff5a4ffa82ad93f411eada1;p=re2c Don't bother with reachability when reporting nullable rules. We couldn't trace all unreachable nullable rules anyway, e.g.: [^]? Nullable part of this rule is unreachable, but it was reported. Besides, there's nothing bad in reporting all probles at once. --- diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 3c1597a0..408e3bfa 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -86,7 +86,6 @@ SRC = \ src/ir/nfa/init_rules.cc \ src/ir/nfa/make_tags.cc \ src/ir/nfa/nfa.cc \ - src/ir/nfa/nullable.cc \ src/ir/nfa/regexps2nfa.cc \ src/ir/adfa/adfa.cc \ src/ir/adfa/prepare.cc \ @@ -103,6 +102,7 @@ SRC = \ src/ir/regexp/encoding/utf16/utf16_regexp.cc \ src/ir/regexp/encoding/utf16/utf16.cc \ src/ir/regexp/encoding/utf16/utf16_range.cc \ + src/ir/regexp/nullable.cc \ src/ir/regexp/regexp.cc \ src/ir/regexp/split_charset.cc \ src/ir/compile.cc \ @@ -112,7 +112,7 @@ SRC = \ src/ir/skeleton/generate_data.cc \ src/ir/skeleton/maxpath.cc \ src/ir/skeleton/skeleton.cc \ - src/ir/skeleton/unreachable_nullable.cc \ + src/ir/skeleton/unreachable.cc \ src/ir/tag.cc \ src/ir/tagpool.cc \ src/main.cc \ diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 84ef2af2..0800f6c7 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -175,7 +175,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra head->action.set_initial (initial_label, head->action.type == Action::SAVE); warn_undefined_control_flow(*skeleton); - warn_unreachable_nullable_rules(*skeleton); + warn_unreachable(*skeleton); if (opts->target == opt_t::SKELETON) { if (output.skeletons.insert (name).second) diff --git a/re2c/src/conf/warn.cc b/re2c/src/conf/warn.cc index 5810f129..8c75420b 100644 --- a/re2c/src/conf/warn.cc +++ b/re2c/src/conf/warn.cc @@ -109,13 +109,14 @@ void Warn::empty_class (uint32_t line) } } -void Warn::match_empty_string (uint32_t line) +void Warn::match_empty_string (uint32_t line, const std::string &cond) { if (mask[MATCH_EMPTY_STRING] & WARNING) { const bool e = mask[MATCH_EMPTY_STRING] & ERROR; error_accuml |= e; - warning (names[MATCH_EMPTY_STRING], line, e, "rule matches empty string"); + warning (names[MATCH_EMPTY_STRING], line, e, + "rule %smatches empty string", incond(cond).c_str()); } } diff --git a/re2c/src/conf/warn.h b/re2c/src/conf/warn.h index 58501c2f..419ddbfa 100644 --- a/re2c/src/conf/warn.h +++ b/re2c/src/conf/warn.h @@ -58,7 +58,7 @@ public: void condition_order (uint32_t line); void empty_class (uint32_t line); - void match_empty_string (uint32_t line); + void match_empty_string (uint32_t line, const std::string &cond); void nondeterministic_tags(uint32_t line, const std::string &cond, const std::string *tagname); void swapped_range (uint32_t line, uint32_t l, uint32_t u); void undefined_control_flow (const Skeleton &skel, std::vector & paths, bool overflow); diff --git a/re2c/src/ir/compile.cc b/re2c/src/ir/compile.cc index 73369ad7..c380f889 100644 --- a/re2c/src/ir/compile.cc +++ b/re2c/src/ir/compile.cc @@ -38,6 +38,8 @@ static smart_ptr compile_rules( const uint32_t line = output.source.block().line; const std::string name = make_name(cond, line); + warn_nullable(rules, cond); + // The original set of code units (charset) might be very large. // A common trick it is to split charset into disjoint character ranges // and choose a representative of each range (we choose lower bound). diff --git a/re2c/src/ir/nfa/init_rules.cc b/re2c/src/ir/nfa/init_rules.cc index dc3dc512..7e43a2f4 100644 --- a/re2c/src/ir/nfa/init_rules.cc +++ b/re2c/src/ir/nfa/init_rules.cc @@ -44,7 +44,6 @@ void init_rules(const std::vector ®exps, for (size_t r = 0, t = 0; r < nr; ++r) { Rule &rule = rules[r]; rule.info = regexps[r]->info; - rule.nullable = nullable_rule(regexps[r]); rule.ltag = t; for (; t < nt && tags[t].rule == r; ++t); diff --git a/re2c/src/ir/nfa/nullable.cc b/re2c/src/ir/nfa/nullable.cc deleted file mode 100644 index a2fbf822..00000000 --- a/re2c/src/ir/nfa/nullable.cc +++ /dev/null @@ -1,38 +0,0 @@ -#include "src/ir/nfa/nfa.h" - -namespace re2c { - -static bool nullable(const RegExp *re, bool &trail) -{ - if (trail) { - return true; - } - switch (re->type) { - case RegExp::NIL: - case RegExp::ITER: - return true; - case RegExp::TAG: - if (re->tag == NULL) { - trail = true; - } - return true; - case RegExp::SYM: - return false; - case RegExp::ALT: - return nullable(re->alt.re1, trail) - || nullable(re->alt.re2, trail); - case RegExp::CAT: - return nullable(re->cat.re1, trail) - && nullable(re->cat.re2, trail); - default: - assert(false); - } -} - -bool nullable_rule(const RegExpRule *rule) -{ - bool trail = false; - return nullable(rule->re, trail); -} - -} // namespace re2c diff --git a/re2c/src/ir/regexp/nullable.cc b/re2c/src/ir/regexp/nullable.cc new file mode 100644 index 00000000..3abef054 --- /dev/null +++ b/re2c/src/ir/regexp/nullable.cc @@ -0,0 +1,52 @@ +#include "src/conf/opt.h" +#include "src/ir/regexp/regexp.h" +#include "src/globals.h" + +namespace re2c { + +static bool nullable(const RegExp *re, bool &trail) +{ + if (trail) { + return true; + } + switch (re->type) { + case RegExp::NIL: + case RegExp::ITER: + return true; + case RegExp::TAG: + if (re->tag == NULL) { + trail = true; + } + return true; + case RegExp::SYM: + return false; + case RegExp::ALT: + return nullable(re->alt.re1, trail) + || nullable(re->alt.re2, trail); + case RegExp::CAT: + return nullable(re->cat.re1, trail) + && nullable(re->cat.re2, trail); + default: + assert(false); + } +} + +/* + * warn about rules that match empty string + * (including rules with nonempty trailing context) + * false positives on partially self-shadowed rules like [^]? + */ +void warn_nullable(const std::vector ®exps, + const std::string &cond) +{ + const size_t nregexps = regexps.size(); + for (size_t i = 0; i < nregexps; ++i) { + const RegExpRule *r = regexps[i]; + bool trail = false; + if (nullable(r->re, trail)) { + warn.match_empty_string(r->info->loc.line, cond); + } + } +} + +} // namespace re2c diff --git a/re2c/src/ir/regexp/regexp.h b/re2c/src/ir/regexp/regexp.h index 1244a4d9..2d216753 100644 --- a/re2c/src/ir/regexp/regexp.h +++ b/re2c/src/ir/regexp/regexp.h @@ -118,6 +118,8 @@ const RegExp *doCat(const RegExp *re1, const RegExp *re2); const RegExp *repeat(const RegExp *re, uint32_t n); const RegExp *repeat_from_to(const RegExp *re, uint32_t n, uint32_t m); const RegExp *repeat_from(const RegExp *re, uint32_t n); +void warn_nullable(const std::vector ®exps, + const std::string &cond); } // end namespace re2c diff --git a/re2c/src/ir/rule.h b/re2c/src/ir/rule.h index 0cfc6bdb..1824c8d6 100644 --- a/re2c/src/ir/rule.h +++ b/re2c/src/ir/rule.h @@ -39,7 +39,6 @@ struct Rule size_t htag; size_t trail; size_t tags; - bool nullable; std::set shadow; bool reachable; @@ -49,7 +48,6 @@ struct Rule , htag(0) , trail(Tag::NONE) , tags(0) - , nullable(false) , shadow() , reachable(false) {} diff --git a/re2c/src/ir/skeleton/skeleton.h b/re2c/src/ir/skeleton/skeleton.h index 7e54d372..255f0974 100644 --- a/re2c/src/ir/skeleton/skeleton.h +++ b/re2c/src/ir/skeleton/skeleton.h @@ -85,7 +85,7 @@ template key_t Skeleton::rule2key(size_t r, size_t def) const uint32_t maxpath(const Skeleton &skel); void warn_undefined_control_flow(const Skeleton &skel); void fprint_default_path(FILE *f, const Skeleton &skel, const path_t &p); -void warn_unreachable_nullable_rules(const Skeleton &skel); +void warn_unreachable(const Skeleton &skel); void emit_data(const Skeleton &skel); void emit_prolog(OutputFile & o); void emit_start(const Skeleton &skel, OutputFile &o, size_t maxfill, diff --git a/re2c/src/ir/skeleton/unreachable_nullable.cc b/re2c/src/ir/skeleton/unreachable.cc similarity index 82% rename from re2c/src/ir/skeleton/unreachable_nullable.cc rename to re2c/src/ir/skeleton/unreachable.cc index 5bbb4951..3c0dc8e5 100644 --- a/re2c/src/ir/skeleton/unreachable_nullable.cc +++ b/re2c/src/ir/skeleton/unreachable.cc @@ -36,7 +36,7 @@ static void calc_reachable( } } -void warn_unreachable_nullable_rules(const Skeleton &skel) +void warn_unreachable(const Skeleton &skel) { // calculate reachable rules const size_t nnodes = skel.nodes_count; @@ -76,18 +76,6 @@ void warn_unreachable_nullable_rules(const Skeleton &skel) warn.unreachable_rule(skel.cond, rule); } } - - // warn about nullable rules: - // - rules that match empty string - // - rules that match empty strins with nonempty trailing context - // false positives on partially shadowed (yet reachable) rules, e.g.: - // [^]? - for (size_t i = 0; i < nrules; ++i) { - const Rule &rule = rules[i]; - if (rule.nullable && rule.reachable) { - warn.match_empty_string(rule.info->loc.line); - } - } } } // namespace re2c diff --git a/re2c/test/bug116.c b/re2c/test/bug116.c index f89a356a..45402941 100644 --- a/re2c/test/bug116.c +++ b/re2c/test/bug116.c @@ -20,5 +20,5 @@ yy3: } #line 3 "bug116.re" -re2c: warning: line 3: control flow is undefined for strings that match '[\x0-\x60\x62-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 2: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: control flow is undefined for strings that match '[\x0-\x60\x62-\xFF]', use default rule '*' [-Wundefined-control-flow] diff --git a/re2c/test/bug2462777.ci.c b/re2c/test/bug2462777.ci.c index 77c241d5..345fcb73 100644 --- a/re2c/test/bug2462777.ci.c +++ b/re2c/test/bug2462777.ci.c @@ -21,4 +21,4 @@ yy4: {x;} } -re2c: warning: line 2: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 2: rule in condition 'X' matches empty string [-Wmatch-empty-string] diff --git a/re2c/test/condition_02.c.c b/re2c/test/condition_02.c.c index 36d36a04..0fb4afc9 100644 --- a/re2c/test/condition_02.c.c +++ b/re2c/test/condition_02.c.c @@ -51,6 +51,6 @@ yy14: } #line 7 "condition_02.c.re" -re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: rule in condition '0' matches empty string [-Wmatch-empty-string] re2c: warning: line 7: control flow in condition 'r1' is undefined for strings that match '[\x0-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 7: control flow in condition 'r2' is undefined for strings that match '[\x0-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] diff --git a/re2c/test/condition_02.cg.c b/re2c/test/condition_02.cg.c index cf693a6b..7a18c466 100644 --- a/re2c/test/condition_02.cg.c +++ b/re2c/test/condition_02.cg.c @@ -46,7 +46,7 @@ yy14: } #line 7 "condition_02.cg.re" -re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: rule in condition '0' matches empty string [-Wmatch-empty-string] re2c: warning: line 7: control flow in condition 'r1' is undefined for strings that match '[\x0-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 7: control flow in condition 'r2' is undefined for strings that match '[\x0-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 7: looks like you use hardcoded numbers instead of autogenerated condition names: better add '/*!types:re2c*/' directive or '-t, --type-header' option and don't rely on fixed condition order. [-Wcondition-order] diff --git a/re2c/test/condition_03.cg.c b/re2c/test/condition_03.cg.c index aad5c09b..7f5c76e1 100644 --- a/re2c/test/condition_03.cg.c +++ b/re2c/test/condition_03.cg.c @@ -79,7 +79,7 @@ yy22: } #line 9 "condition_03.cg.re" -re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: rule in condition '0' matches empty string [-Wmatch-empty-string] re2c: warning: line 9: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 9: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 9: looks like you use hardcoded numbers instead of autogenerated condition names: better add '/*!types:re2c*/' directive or '-t, --type-header' option and don't rely on fixed condition order. [-Wcondition-order] diff --git a/re2c/test/condition_11.cg.c b/re2c/test/condition_11.cg.c index 7bbf213b..7941d6d3 100644 --- a/re2c/test/condition_11.cg.c +++ b/re2c/test/condition_11.cg.c @@ -84,7 +84,7 @@ yy22: } #line 9 "condition_11.cg.re" -re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: rule in condition '0' matches empty string [-Wmatch-empty-string] re2c: warning: line 9: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 9: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 9: looks like you use hardcoded numbers instead of autogenerated condition names: better add '/*!types:re2c*/' directive or '-t, --type-header' option and don't rely on fixed condition order. [-Wcondition-order] diff --git a/re2c/test/condition_13.cg.c b/re2c/test/condition_13.cg.c index 998e9084..084ff3a7 100644 --- a/re2c/test/condition_13.cg.c +++ b/re2c/test/condition_13.cg.c @@ -68,7 +68,7 @@ yy22: } #line 9 "condition_13.cg.re" -re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: rule in condition '0' matches empty string [-Wmatch-empty-string] re2c: warning: line 9: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 9: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 9: looks like you use hardcoded numbers instead of autogenerated condition names: better add '/*!types:re2c*/' directive or '-t, --type-header' option and don't rely on fixed condition order. [-Wcondition-order] diff --git a/re2c/test/default_dup_star_1.ic.c b/re2c/test/default_dup_star_1.ic.c index 940756bf..d62ce064 100644 --- a/re2c/test/default_dup_star_1.ic.c +++ b/re2c/test/default_dup_star_1.ic.c @@ -28,4 +28,5 @@ yy12: { return DEFAULT-*; } } +re2c: warning: line 4: rule in condition 'c3' matches empty string [-Wmatch-empty-string] re2c: warning: line 4: unreachable rule in condition 'c3' (shadowed by rule at line 5) [-Wunreachable-rules] diff --git a/re2c/test/input4.--empty-class(match-empty).c b/re2c/test/input4.--empty-class(match-empty).c index 1ef8d807..6635a71a 100644 --- a/re2c/test/input4.--empty-class(match-empty).c +++ b/re2c/test/input4.--empty-class(match-empty).c @@ -19,5 +19,6 @@ yy3: #line 7 "input4.--empty-class(match-empty).re" re2c: warning: line 3: empty character class [-Wempty-character-class] +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule (shadowed by rule at line 4) [-Wunreachable-rules] re2c: warning: line 5: unreachable rule (shadowed by rule at line 4) [-Wunreachable-rules] diff --git a/re2c/test/input4.c b/re2c/test/input4.c index 517b8282..302c2246 100644 --- a/re2c/test/input4.c +++ b/re2c/test/input4.c @@ -19,5 +19,6 @@ yy3: #line 7 "input4.re" re2c: warning: line 3: empty character class [-Wempty-character-class] +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule (shadowed by rule at line 4) [-Wunreachable-rules] re2c: warning: line 5: unreachable rule (shadowed by rule at line 4) [-Wunreachable-rules] diff --git a/re2c/test/input6.--empty-class(match-empty).c b/re2c/test/input6.--empty-class(match-empty).c index 238c64a8..5652549d 100644 --- a/re2c/test/input6.--empty-class(match-empty).c +++ b/re2c/test/input6.--empty-class(match-empty).c @@ -21,4 +21,5 @@ yy3: re2c: warning: line 3: empty character class [-Wempty-character-class] re2c: warning: line 3: empty character class [-Wempty-character-class] +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule (shadowed by rule at line 4) [-Wunreachable-rules] diff --git a/re2c/test/input6.c b/re2c/test/input6.c index 410ad581..f0ff023a 100644 --- a/re2c/test/input6.c +++ b/re2c/test/input6.c @@ -21,4 +21,5 @@ yy3: re2c: warning: line 3: empty character class [-Wempty-character-class] re2c: warning: line 3: empty character class [-Wempty-character-class] +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule (shadowed by rule at line 4) [-Wunreachable-rules] diff --git a/re2c/test/segfault_full_range_star.bi.c b/re2c/test/segfault_full_range_star.bi.c index 0f9f7d7b..d05aaf19 100644 --- a/re2c/test/segfault_full_range_star.bi.c +++ b/re2c/test/segfault_full_range_star.bi.c @@ -48,4 +48,5 @@ yy0: {} } +re2c: warning: line 2: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 2: unreachable rule [-Wunreachable-rules] diff --git a/re2c/test/segfault_full_range_star_startlabel.ci.c b/re2c/test/segfault_full_range_star_startlabel.ci.c index f99aeeed..5a0846d9 100644 --- a/re2c/test/segfault_full_range_star_startlabel.ci.c +++ b/re2c/test/segfault_full_range_star_startlabel.ci.c @@ -18,4 +18,5 @@ yy1: {} } +re2c: warning: line 3: rule in condition 'c1' matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule in condition 'c1' [-Wunreachable-rules] diff --git a/re2c/test/segfault_full_range_star_startlabel.i.c b/re2c/test/segfault_full_range_star_startlabel.i.c index a182f7d8..29e7c378 100644 --- a/re2c/test/segfault_full_range_star_startlabel.i.c +++ b/re2c/test/segfault_full_range_star_startlabel.i.c @@ -13,4 +13,5 @@ yy0: {} } +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule [-Wunreachable-rules] diff --git a/re2c/test/segfault_full_range_star_startlabel2.ci.c b/re2c/test/segfault_full_range_star_startlabel2.ci.c index bc528844..b624a5f1 100644 --- a/re2c/test/segfault_full_range_star_startlabel2.ci.c +++ b/re2c/test/segfault_full_range_star_startlabel2.ci.c @@ -18,4 +18,5 @@ yy1: {} } +re2c: warning: line 3: rule in condition 'c1' matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule in condition 'c1' [-Wunreachable-rules] diff --git a/re2c/test/segfault_full_range_star_startlabel2.i.c b/re2c/test/segfault_full_range_star_startlabel2.i.c index 214ac741..0e60f889 100644 --- a/re2c/test/segfault_full_range_star_startlabel2.i.c +++ b/re2c/test/segfault_full_range_star_startlabel2.i.c @@ -12,4 +12,5 @@ yy0: {} } +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 3: unreachable rule [-Wunreachable-rules] diff --git a/re2c/test/tags/nondet_iter.--tags.c b/re2c/test/tags/nondet_iter.--tags.c index 661c10de..350eb6bb 100644 --- a/re2c/test/tags/nondet_iter.--tags.c +++ b/re2c/test/tags/nondet_iter.--tags.c @@ -1,10 +1,10 @@ -re2c: error: line 5: tag 'b' is nondeterministic -re2c: error: line 6: tag 'e' is nondeterministic -re2c: error: line 8: tag 'c' is nondeterministic -re2c: error: line 9: tag 'f' is nondeterministic re2c: warning: line 2: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 5: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 6: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 8: rule matches empty string [-Wmatch-empty-string] re2c: warning: line 9: rule matches empty string [-Wmatch-empty-string] +re2c: error: line 5: tag 'b' is nondeterministic +re2c: error: line 6: tag 'e' is nondeterministic +re2c: error: line 8: tag 'c' is nondeterministic +re2c: error: line 9: tag 'f' is nondeterministic diff --git a/re2c/test/yyaccept_missing.bci.c b/re2c/test/yyaccept_missing.bci.c index fe4aa299..71e7ba45 100644 --- a/re2c/test/yyaccept_missing.bci.c +++ b/re2c/test/yyaccept_missing.bci.c @@ -165,4 +165,4 @@ int main(int argc, char **argv) return 1; } } -re2c: warning: line 44: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 44: rule in condition 'ST_VALUE' matches empty string [-Wmatch-empty-string]