From 314eb5c705cf45b19b5637b7f2b6d58bc6909e28 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 6 Dec 2016 13:35:07 +0000 Subject: [PATCH] Untwined determinization and '-Wnondeterministic-tags' analysis. Now '-Wnondeterministic-tags' analysis is done right after determinization, while we still have all the kernels available. Instead of comparing transition tags (which are available in closure items, but not in kernels) we now compare tag versions: maximum number of distinct versions per tag is the desired degree of nondeterminism. Kernel items that belong to another rule do not count (all tags that don't belong to a particular rule should have initial versions in all items belonging to this rule). Added test with an example of pathological regexp: counted repetition cannot be folded efficiently in DFA, and combined with nondeterministic tags it results in arbitrary long chains of reordering commands. Length of command choin grows linearly with the number of repetitions (as DFA itself does). --- re2c/src/conf/warn.cc | 16 ++-- re2c/src/conf/warn.h | 2 +- re2c/src/ir/dfa/closure.cc | 30 +------ re2c/src/ir/dfa/closure.h | 3 +- re2c/src/ir/dfa/determinization.cc | 63 +++++++++++---- re2c/test/tags/copy_save.i--tags.c | 2 +- re2c/test/tags/counter1.i--tags.c | 118 ++++++++++++++++++++++++++++ re2c/test/tags/counter1.i--tags.re | 6 ++ re2c/test/tags/nondet_cat1.--tags.c | 2 +- re2c/test/tags/nondet_cat3.i.c | 4 +- re2c/test/tags/nondet_cat4.--tags.c | 2 +- re2c/test/tags/topsort1.i--tags.c | 2 +- re2c/test/tags/topsort2.i--tags.c | 4 +- re2c/test/tags/twopass.i--tags.c | 10 +-- re2c/test/tags/uniq.i--tags.c | 12 +-- 15 files changed, 203 insertions(+), 73 deletions(-) create mode 100644 re2c/test/tags/counter1.i--tags.c create mode 100644 re2c/test/tags/counter1.i--tags.re diff --git a/re2c/src/conf/warn.cc b/re2c/src/conf/warn.cc index cd7cf044..3904d8cb 100644 --- a/re2c/src/conf/warn.cc +++ b/re2c/src/conf/warn.cc @@ -121,20 +121,22 @@ void Warn::match_empty_string (uint32_t line, const std::string &cond) } void Warn::nondeterministic_tags(uint32_t line, const std::string &cond, - const std::string *tagname) + const std::string *tagname, size_t nver) { if (mask[NONDETERMINISTIC_TAGS] & WARNING) { bool e = mask[NONDETERMINISTIC_TAGS] & ERROR; error_accuml |= e; + + warning_start(line, e); if (tagname == NULL) { - warning(names[NONDETERMINISTIC_TAGS], line, e, - "trailing context %sis nondeterministic", - incond(cond).c_str()); + fprintf(stderr, "trailing context"); } else { - warning(names[NONDETERMINISTIC_TAGS], line, e, - "tag '%s' %sis nondeterministic", - tagname->c_str(), incond(cond).c_str()); + fprintf(stderr, "tag '%s'", tagname->c_str()); } + fprintf(stderr, + " %sis non-deterministic and induces %u parallel instances", + incond(cond).c_str(), static_cast(nver)); + warning_end(names[NONDETERMINISTIC_TAGS], e); } } diff --git a/re2c/src/conf/warn.h b/re2c/src/conf/warn.h index 419ddbfa..9c7eeeeb 100644 --- a/re2c/src/conf/warn.h +++ b/re2c/src/conf/warn.h @@ -59,7 +59,7 @@ public: void condition_order (uint32_t line); void empty_class (uint32_t line); void match_empty_string (uint32_t line, const std::string &cond); - void nondeterministic_tags(uint32_t line, const std::string &cond, const std::string *tagname); + void nondeterministic_tags(uint32_t line, const std::string &cond, const std::string *tagname, size_t nver); void swapped_range (uint32_t line, uint32_t l, uint32_t u); void undefined_control_flow (const Skeleton &skel, std::vector & paths, bool overflow); void unreachable_rule (const std::string & cond, const Rule &rule); diff --git a/re2c/src/ir/dfa/closure.cc b/re2c/src/ir/dfa/closure.cc index 6450daa5..c296f4a4 100644 --- a/re2c/src/ir/dfa/closure.cc +++ b/re2c/src/ir/dfa/closure.cc @@ -11,12 +11,10 @@ bool is_better(const clos_t &c1, const clos_t &c2, Tagpool &tagpool); static bool compare_by_rule(const clos_t &c1, const clos_t &c2); static void prune_final_items(closure_t &clos, std::valarray &rules); static bool not_fin(const clos_t &c); -static void check_nondeterminism(const closure_t &clos, Tagpool &tagpool, const std::valarray &rules, bool *badtags); static tagsave_t *merge_transition_tags(closure_t &clos, Tagpool &tagpool, tcpool_t &tcpool, tagver_t &maxver); tagsave_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, - tcpool_t &tcpool, std::valarray &rules, bool *badtags, - tagver_t &maxver) + tcpool_t &tcpool, std::valarray &rules, tagver_t &maxver) { // build tagged epsilon-closure of the given set of NFA states clos2.clear(); @@ -32,9 +30,6 @@ tagsave_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, // sort closure, group items by rule std::sort(clos2.begin(), clos2.end(), compare_by_rule); - // find nondeterministic tags - check_nondeterminism(clos2, tagpool, rules, badtags); - // merge tags from different rules, find nondeterministic tags return merge_transition_tags(clos2, tagpool, tcpool, maxver); } @@ -205,29 +200,6 @@ bool not_fin(const clos_t &c) return c.state->type != nfa_state_t::FIN; } -// WARNING: this function assumes that closure items are grouped bu rule -void check_nondeterminism(const closure_t &clos, Tagpool &tagpool, - const std::valarray &rules, bool *badtags) -{ - size_t r = 0; - for (cclositer_t c = clos.begin(), e = clos.end(); c != e;) { - const tagver_t *x = tagpool[c->ttran]; - - // find next rule that occurs in closure - for (; r < c->state->rule; ++r); - const Rule &rule = rules[r]; - - // compare the 1st item with the rest of items: if some tag - // differs from that of the 1st item, then it is nondeterministic - for (++c; c != e && c->state->rule == r; ++c) { - const tagver_t *y = tagpool[c->ttran]; - for (size_t t = rule.lvar; t < rule.hvar; ++t) { - badtags[t] |= y[t] != x[t]; - } - } - } -} - tagsave_t *merge_transition_tags(closure_t &clos, Tagpool &tagpool, tcpool_t &tcpool, tagver_t &maxver) { diff --git a/re2c/src/ir/dfa/closure.h b/re2c/src/ir/dfa/closure.h index 636435d5..8da1bab6 100644 --- a/re2c/src/ir/dfa/closure.h +++ b/re2c/src/ir/dfa/closure.h @@ -23,8 +23,7 @@ typedef closure_t::iterator clositer_t; typedef closure_t::const_iterator cclositer_t; tagsave_t *closure(closure_t &clos1, closure_t &clos2, Tagpool &tagpool, - tcpool_t &tcpool, std::valarray &rules, bool *badtags, - tagver_t &maxver); + tcpool_t &tcpool, std::valarray &rules, tagver_t &maxver); } // namespace re2c diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 10c6270e..0efff2d0 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -17,7 +17,8 @@ namespace re2c static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol); static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol); -static void warn_bad_tags(const bool *badtags, const std::vector &tags, +static void warn_nondeterministic_tags(const kernels_t &kernels, + const Tagpool &tagpool, const std::vector &tags, const std::valarray &rules, const std::string &cond); const size_t dfa_t::NIL = std::numeric_limits::max(); @@ -64,7 +65,6 @@ dfa_t::dfa_t(const nfa_t &nfa, Tagpool tagpool(ntag); kernels_t kernels(tagpool, tcpool); closure_t clos1, clos2; - bool *badtags = new bool[ntag](); dump_dfa_t dump(*this, tagpool, nfa); // all-zero tag configuration must have static number zero @@ -80,7 +80,7 @@ dfa_t::dfa_t(const nfa_t &nfa, clos_t c0 = {NULL, nfa.root, INITIAL_TAGS, ZERO_TAGS, ZERO_TAGS}; clos1.push_back(c0); - closure(clos1, clos2, tagpool, tcpool, rules, badtags, maxtagver); + closure(clos1, clos2, tagpool, tcpool, rules, maxtagver); kernels.insert(clos2, NULL, maxtagver); dump.state0(clos2); @@ -111,27 +111,60 @@ dfa_t::dfa_t(const nfa_t &nfa, // find identical closure or add the new one for (size_t c = 0; c < nchars; ++c) { reach(kernel, clos1, charset[c]); - s->tcmd[c].save = closure(clos1, clos2, tagpool, tcpool, rules, badtags, maxtagver); + s->tcmd[c].save = closure(clos1, clos2, tagpool, tcpool, rules, maxtagver); s->arcs[c] = kernels.insert(clos2, &s->tcmd[c], maxtagver); dump.state(clos2, i, c); } } - warn_bad_tags(badtags, vartags, rules, cond); - delete[] badtags; + warn_nondeterministic_tags(kernels, tagpool, vartags, rules, cond); } -void warn_bad_tags(const bool *badtags, - const std::vector &tags, - const std::valarray &rules, - const std::string &cond) +/* + * For each tag, find maximal number of parallel versions of this tag + * used in each kernel (degree of non-determinism) and warn about tags with + * maximum degree two or more. + * + * WARNING: this function assumes that kernel items are grouped by rule + */ +void warn_nondeterministic_tags(const kernels_t &kernels, + const Tagpool &tagpool, const std::vector &tags, + const std::valarray &rules, const std::string &cond) { - const size_t ntags = tags.size(); - for (size_t i = 0; i < ntags; ++i) { - if (badtags[i]) { - const VarTag &tag = tags[i]; + const size_t + ntag = tagpool.ntags, + nkrn = kernels.size(); + std::vector maxv(ntag, 0); + std::set uniq; + + for (size_t i = 0; i < nkrn; ++i) { + const kernel_t *k = kernels[i]; + nfa_state_t **s = k->state; + const size_t n = k->size, *v = k->tvers; + + for (size_t u = 0; u < n;) { + const size_t r = s[u]->rule; + const Rule &rule = rules[r]; + + const size_t l = u; + for (; ++u < n && s[u]->rule == r;); + + for (size_t t = rule.lvar; t < rule.hvar; ++t) { + uniq.clear(); + for (size_t m = l; m < u; ++m) { + uniq.insert(tagpool[v[m]][t]); + } + maxv[t] = std::max(maxv[t], uniq.size()); + } + } + } + + for (size_t t = 0; t < ntag; ++t) { + const size_t m = maxv[t]; + if (m > 1) { + const VarTag &tag = tags[t]; const uint32_t line = rules[tag.rule].info->loc.line; - warn.nondeterministic_tags(line, cond, tag.name); + warn.nondeterministic_tags(line, cond, tag.name, m); } } } diff --git a/re2c/test/tags/copy_save.i--tags.c b/re2c/test/tags/copy_save.i--tags.c index dbb5b417..f064ada3 100644 --- a/re2c/test/tags/copy_save.i--tags.c +++ b/re2c/test/tags/copy_save.i--tags.c @@ -53,4 +53,4 @@ yy7: } re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] -re2c: warning: line 3: tag 'p' is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 3: tag 'p' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/counter1.i--tags.c b/re2c/test/tags/counter1.i--tags.c new file mode 100644 index 00000000..9b373ac6 --- /dev/null +++ b/re2c/test/tags/counter1.i--tags.c @@ -0,0 +1,118 @@ +/* Generated by re2c */ + +{ + YYCTYPE yych; + if ((YYLIMIT - YYCURSOR) < 10) YYFILL(10); + yych = *YYCURSOR; + switch (yych) { + case 'd': + yyt1 = NULL; + yyt2 = YYCURSOR; + goto yy4; + default: goto yy2; + } +yy2: + ++YYCURSOR; +yy3: + {} +yy4: + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'd': + yyt3 = YYCURSOR; + goto yy5; + default: goto yy3; + } +yy5: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt4 = YYCURSOR; + goto yy7; + default: goto yy6; + } +yy6: + YYCURSOR = YYMARKER; + goto yy3; +yy7: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt5 = YYCURSOR; + goto yy8; + default: goto yy6; + } +yy8: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt6 = YYCURSOR; + goto yy9; + default: goto yy6; + } +yy9: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt7 = YYCURSOR; + goto yy10; + default: goto yy6; + } +yy10: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt8 = YYCURSOR; + goto yy11; + default: goto yy6; + } +yy11: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt9 = YYCURSOR; + goto yy12; + default: goto yy6; + } +yy12: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt10 = YYCURSOR; + goto yy13; + default: goto yy6; + } +yy13: + yych = *++YYCURSOR; + switch (yych) { + case 'd': + yyt11 = YYCURSOR; + goto yy14; + default: goto yy6; + } +yy14: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'd': + yyt1 = yyt2; + yyt2 = yyt3; + yyt3 = yyt4; + yyt4 = yyt5; + yyt5 = yyt6; + yyt6 = yyt7; + yyt7 = yyt8; + yyt8 = yyt9; + yyt9 = yyt10; + yyt10 = yyt11; + yyt11 = YYCURSOR; + goto yy14; + default: goto yy16; + } +yy16: + z = yyt1; + {} +} + +re2c: warning: line 3: tag 'z' is non-deterministic and induces 11 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/counter1.i--tags.re b/re2c/test/tags/counter1.i--tags.re new file mode 100644 index 00000000..e46568ad --- /dev/null +++ b/re2c/test/tags/counter1.i--tags.re @@ -0,0 +1,6 @@ +/*!re2c + +(@z "d")* "d"{10} {} +* {} + +*/ diff --git a/re2c/test/tags/nondet_cat1.--tags.c b/re2c/test/tags/nondet_cat1.--tags.c index d82030a3..f32ecd9c 100644 --- a/re2c/test/tags/nondet_cat1.--tags.c +++ b/re2c/test/tags/nondet_cat1.--tags.c @@ -42,4 +42,4 @@ yy7: } #line 4 "tags/nondet_cat1.--tags.re" -re2c: warning: line 2: tag 'p' is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 2: tag 'p' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/nondet_cat3.i.c b/re2c/test/tags/nondet_cat3.i.c index 475f0f06..355f5553 100644 --- a/re2c/test/tags/nondet_cat3.i.c +++ b/re2c/test/tags/nondet_cat3.i.c @@ -144,5 +144,5 @@ yy31: {} } -re2c: warning: line 21: trailing context is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 29: trailing context is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 21: trailing context is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 29: trailing context is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/nondet_cat4.--tags.c b/re2c/test/tags/nondet_cat4.--tags.c index 9701ea8a..8d175dad 100644 --- a/re2c/test/tags/nondet_cat4.--tags.c +++ b/re2c/test/tags/nondet_cat4.--tags.c @@ -48,4 +48,4 @@ yy7: } #line 6 "tags/nondet_cat4.--tags.re" -re2c: warning: line 4: tag 'p' is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 4: tag 'p' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/topsort1.i--tags.c b/re2c/test/tags/topsort1.i--tags.c index 6caec5c4..68607f3a 100644 --- a/re2c/test/tags/topsort1.i--tags.c +++ b/re2c/test/tags/topsort1.i--tags.c @@ -60,4 +60,4 @@ yy9: } re2c: warning: line 4: rule matches empty string [-Wmatch-empty-string] -re2c: warning: line 3: tag 'p' is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 3: tag 'p' is non-deterministic and induces 5 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/topsort2.i--tags.c b/re2c/test/tags/topsort2.i--tags.c index 401146eb..92fcd038 100644 --- a/re2c/test/tags/topsort2.i--tags.c +++ b/re2c/test/tags/topsort2.i--tags.c @@ -64,5 +64,5 @@ yy9: } re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] -re2c: warning: line 3: tag 'p' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 3: tag 'q' is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 3: tag 'p' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 3: tag 'q' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/twopass.i--tags.c b/re2c/test/tags/twopass.i--tags.c index 0a459408..d5e0e1fd 100644 --- a/re2c/test/tags/twopass.i--tags.c +++ b/re2c/test/tags/twopass.i--tags.c @@ -207,8 +207,8 @@ yy21: } re2c: warning: line 7: rule matches empty string [-Wmatch-empty-string] -re2c: warning: line 6: tag 'y' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 6: tag 'q' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 6: tag 'p' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 7: tag 's' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 7: tag 'r' is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 6: tag 'y' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 6: tag 'q' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 6: tag 'p' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 7: tag 's' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 7: tag 'r' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/uniq.i--tags.c b/re2c/test/tags/uniq.i--tags.c index 8d99e3d3..6a8be080 100644 --- a/re2c/test/tags/uniq.i--tags.c +++ b/re2c/test/tags/uniq.i--tags.c @@ -48,9 +48,9 @@ yy6: } re2c: warning: line 6: rule matches empty string [-Wmatch-empty-string] -re2c: warning: line 5: tag 'z' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 5: tag 'y' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 5: tag 'x' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 5: tag 'w' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 5: tag 'v' is nondeterministic [-Wnondeterministic-tags] -re2c: warning: line 5: tag 'u' is nondeterministic [-Wnondeterministic-tags] +re2c: warning: line 5: tag 'z' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 5: tag 'y' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 5: tag 'x' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 5: tag 'w' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 5: tag 'v' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 5: tag 'u' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] -- 2.40.0