From e11a1503f574974a412c5cb46b532d45d17a20e9 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Thu, 29 Sep 2016 18:33:51 +0100 Subject: [PATCH] Liveness of fallback tags is a forward-propagagation problem. re2c used to merge all possible fallback tags and backward-propagate them from states that have default transitions. This is an overapproximation; some tags are marked alive while they are not. Liveness of fallback tags should be forward-propagated, because this is the only way to track paths that really need these tags (that is, paths that start in final state and end with default transition *without passing through another final state*). Note that sometimes we can get to the same default transition following two different paths, one path may passing through final state; the other is not. One path cares for fallback tags; the other doesn't. We have no way to find out if we start bacward prpagation of fallback tags from default transition. --- re2c/src/ir/dfa/dead_rules.cc | 3 + re2c/src/ir/dfa/dfa.h | 2 + re2c/src/ir/dfa/tag_deduplication.cc | 116 +++++++++++---------------- re2c/test/tags/fallback1.i--tags.c | 75 +++++++++++++++++ re2c/test/tags/fallback1.i--tags.re | 19 +++++ 5 files changed, 146 insertions(+), 69 deletions(-) create mode 100644 re2c/test/tags/fallback1.i--tags.c create mode 100644 re2c/test/tags/fallback1.i--tags.re diff --git a/re2c/src/ir/dfa/dead_rules.cc b/re2c/src/ir/dfa/dead_rules.cc index 24cefda7..6a908257 100644 --- a/re2c/src/ir/dfa/dead_rules.cc +++ b/re2c/src/ir/dfa/dead_rules.cc @@ -191,6 +191,9 @@ static void find_fallback_states(dfa_t &dfa, const bool *live) const bool *fallthru = &live[nrules * nstates]; for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; + if (fallthru[i]) { + s->fallthru = true; + } if (s->rule != Rule::NONE) { for (size_t c = 0; c < dfa.nchars; ++c) { const size_t j = s->arcs[c]; diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 839df748..f175282d 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -22,6 +22,7 @@ struct dfa_state_t size_t *tags; size_t rule; size_t rule_tags; + bool fallthru; bool fallback; explicit dfa_state_t(size_t nchars) @@ -29,6 +30,7 @@ struct dfa_state_t , tags(new size_t[nchars]) , rule(Rule::NONE) , rule_tags(ZERO_TAGS) + , fallthru(false) , fallback(false) {} ~dfa_state_t() diff --git a/re2c/src/ir/dfa/tag_deduplication.cc b/re2c/src/ir/dfa/tag_deduplication.cc index dbba1954..3f83a784 100644 --- a/re2c/src/ir/dfa/tag_deduplication.cc +++ b/re2c/src/ir/dfa/tag_deduplication.cc @@ -28,34 +28,37 @@ static void backprop(revarc_t **rdfa, } } +static void forwprop(const dfa_t &dfa, bool *been, size_t state, + size_t *live, size_t need) +{ + if (been[state]) return; + been[state] = true; + + live[state] = dfa.tagpool.orl(live[state], need); + + const dfa_state_t *s = dfa.states[state]; + for (size_t c = 0; c < dfa.nchars; ++c) { + const size_t dest = s->arcs[c]; + if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) { + forwprop(dfa, been, dest, live, need); + } + } +} + /* note [liveness analyses on tags] * - * Tag T is alive in state S if there is a transition - * from S to some state S' that does not update T - * and either: - * - * (1) S' is the default state - * and S is final - * and T belongs to S's final tag set + * Liveness of a rule tag is propagated backwards from final + * state and until we meet transition that updates this tag. * - * (2) S' is the default state - * and S is not final - * and T belongs to fallback tag set - * - * (3) S' is not the default state - * and T is alive in S' - * - * The algorithm inspects all DFA states one by one. - * If a state satisfies (1) or (2) for some set of tags, - * the algorithm propagates all these tags backwards to - * fulfill (3). Propagation stops if either all tags are masked - * or live set in not changed after adding new tags. + * Liveness of fallback tag is propagated forward from fallback + * state (see note [fallback states]) and until there remain + * any fallthrough paths from current state. */ -static void calc_live(const dfa_t &dfa, size_t fallback, size_t *live) +static void calc_live(const dfa_t &dfa, size_t *live) { const size_t nstates = dfa.states.size(); - bool *fallthru = new bool[nstates](); + // backward-propagate rule tags revarc_t **rdfa = new revarc_t*[nstates](); revarc_t *rarcs = new revarc_t[nstates * dfa.nchars]; revarc_t *a = rarcs; @@ -68,31 +71,32 @@ static void calc_live(const dfa_t &dfa, size_t fallback, size_t *live) a->tags = s->tags[c]; a->next = rdfa[j]; rdfa[j] = a++; - } else { - fallthru[i] = true; } } } - for (size_t i = 0; i < nstates; ++i) { const dfa_state_t *s = dfa.states[i]; - if (fallthru[i]) { - const size_t tags = s->rule != Rule::NONE - // final state, only rule tags are alive - ? dfa.tagpool.andlinv(dfa.rules[s->rule].tags, - s->rule_tags) - // transition to default state and dispatch on - // 'yyaccept': all fallback rules are potentially - // reachable, their tags are alive - // no mask: no rule implies no rule tags - : fallback; - backprop(rdfa, dfa.tagpool, live, tags, i); + if (s->rule != Rule::NONE) { + const size_t need = dfa.tagpool.andlinv( + dfa.rules[s->rule].tags, s->rule_tags); + backprop(rdfa, dfa.tagpool, live, need, i); } } - delete[] rdfa; delete[] rarcs; - delete[] fallthru; + + // forward-propagate fallback tags + bool *been = new bool[nstates]; + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + if (s->fallback) { + const size_t need = dfa.tagpool.andlinv( + dfa.rules[s->rule].tags, s->rule_tags); + std::fill(been, been + nstates, false); + forwprop(dfa, been, i, live, need); + } + } + delete[] been; } static void mask_dead(dfa_t &dfa, const size_t *live) @@ -134,31 +138,22 @@ static void incompatible(bool *tbl, } static void incompatibility_table(const dfa_t &dfa, - const size_t *livetags, size_t deftags, - bool *incompattbl) + const size_t *livetags, bool *incompattbl) { const size_t nstates = dfa.states.size(); const size_t ntags = dfa.tags.size(); for (size_t i = 0; i < nstates; ++i) { const dfa_state_t *s = dfa.states[i]; - bool fallthru = false; for (size_t c = 0; c < dfa.nchars; ++c) { const size_t j = s->arcs[c]; if (j != dfa_t::NIL) { incompatible(incompattbl, dfa.tagpool, livetags[j], s->tags[c]); - } else { - fallthru = true; } } - if (fallthru) { - if (s->rule != Rule::NONE) { - incompatible(incompattbl, dfa.tagpool, - dfa.rules[s->rule].tags, s->rule_tags); - } else { - incompatible(incompattbl, dfa.tagpool, - deftags, s->rule_tags); - } + if (s->rule != Rule::NONE) { + incompatible(incompattbl, dfa.tagpool, + dfa.rules[s->rule].tags, s->rule_tags); } } @@ -239,21 +234,6 @@ static void patch_tags(dfa_t &dfa, const size_t *represent) } } -// see note [fallback states] -// fallback tags are all tags that belong to fallback rules -static size_t fallback_tags(const dfa_t &dfa) -{ - const size_t nstates = dfa.states.size(); - size_t tags = ZERO_TAGS; - for (size_t i = 0; i < nstates; ++i) { - const dfa_state_t *s = dfa.states[i]; - if (s->fallback) { // see note [fallback states] - tags = dfa.tagpool.orl(tags, dfa.rules[s->rule].tags); - } - } - return tags; -} - size_t deduplicate_tags(dfa_t &dfa) { const size_t ntags = dfa.tags.size(); @@ -261,16 +241,14 @@ size_t deduplicate_tags(dfa_t &dfa) return 0; } - const size_t fallback = fallback_tags(dfa); - const size_t nstates = dfa.states.size(); size_t *live = new size_t[nstates](); - calc_live(dfa, fallback, live); + calc_live(dfa, live); mask_dead(dfa, live); bool *incompattbl = new bool[ntags * ntags](); - incompatibility_table(dfa, live, fallback, incompattbl); + incompatibility_table(dfa, live, incompattbl); size_t *represent = new size_t[ntags](); equivalence_classes(incompattbl, ntags, represent); diff --git a/re2c/test/tags/fallback1.i--tags.c b/re2c/test/tags/fallback1.i--tags.c new file mode 100644 index 00000000..42bddc1c --- /dev/null +++ b/re2c/test/tags/fallback1.i--tags.c @@ -0,0 +1,75 @@ +/* Generated by re2c */ +// This test demonstrates that fallback tags should be forward-propagated +// from final states, and that merging all possible fallback tags and +// bacward-propagating them from default transitions is too crude, +// inhibits tag interference where there is none and gets in the way +// of tag deduplication. + +// Three overlapping rules (constructed so that they trigger 'yyaccept' +// generation), 2nd and 3rd rules have variable-length tail so that their +// tags are not fixed. When 2nd rule is matched, 3rd rule's tag can be +// forgotten: we will never fall back to 3rd rule after matching another +// rule. + + +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + long yytag1p; + YYCTXMARKER = YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 5) YYFILL(5); + yych = *YYCURSOR; + yytag1p = (YYCURSOR - YYCTXMARKER); + switch (yych) { + case 'a': goto yy3; + default: goto yy2; + } +yy2: + { 3 (YYCTXMARKER + yytag1p) } +yy3: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'b': goto yy4; + default: goto yy2; + } +yy4: + yych = *++YYCURSOR; + switch (yych) { + case 'a': + yytag1p = (YYCURSOR - YYCTXMARKER); + goto yy6; + default: goto yy5; + } +yy5: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy2; + } else { + goto yy7; + } +yy6: + yyaccept = 1; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'b': goto yy8; + case 'c': goto yy9; + default: goto yy7; + } +yy7: + { 2 (YYCTXMARKER + yytag1p) } +yy8: + yych = *++YYCURSOR; + switch (yych) { + case 'a': goto yy10; + default: goto yy5; + } +yy9: + yych = *++YYCURSOR; + goto yy7; +yy10: + ++YYCURSOR; + { 1 } +} + +re2c: warning: line 17: rule matches empty string [-Wmatch-empty-string] diff --git a/re2c/test/tags/fallback1.i--tags.re b/re2c/test/tags/fallback1.i--tags.re new file mode 100644 index 00000000..6498b864 --- /dev/null +++ b/re2c/test/tags/fallback1.i--tags.re @@ -0,0 +1,19 @@ +// This test demonstrates that fallback tags should be forward-propagated +// from final states, and that merging all possible fallback tags and +// bacward-propagating them from default transitions is too crude, +// inhibits tag interference where there is none and gets in the way +// of tag deduplication. + +// Three overlapping rules (constructed so that they trigger 'yyaccept' +// generation), 2nd and 3rd rules have variable-length tail so that their +// tags are not fixed. When 2nd rule is matched, 3rd rule's tag can be +// forgotten: we will never fall back to 3rd rule after matching another +// rule. + +/*!re2c + + "ababa" { 1 } + "ab" @p "a" "c"? { 2 @p } + @p "a"? { 3 @p } + +*/ -- 2.50.0