From: Ulya Trofimovich Date: Tue, 17 May 2016 16:11:40 +0000 (+0100) Subject: Fixed bug in tag liveness analyses (tags lost in loops). X-Git-Tag: 1.0~39^2~296 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a7a26d17cee825afcbe8659cb31a04a165172eed;p=re2c Fixed bug in tag liveness analyses (tags lost in loops). Example that revealed failure (added as test): (@p "ac")* "b" { @p } We used forward propagation: starting from initial state, walk DFA in deep-first order; in each state, add tags associated with rule or fallback set if necessary, then recurse to child states, then merge all child sets into this node's live set. This algorithm cannot propagate live tags in loops in cases when DFS visits loopbacks before non-looping paths (tags don't propagate into loopback states). The usual algorithm for liveness analyses doesn't use DFS; instead, it iterates on DFA states until fix point is reached (next iteration adds no changes to live set compared to the previous iteration). The fix applies this algorithm. A more efficient algorithm may be possible; one that uses backwards propagation instead of fix point (I'm not sure). --- diff --git a/re2c/src/ir/dfa/tag_deduplication.cc b/re2c/src/ir/dfa/tag_deduplication.cc index 9b64d8e2..3ef91e06 100644 --- a/re2c/src/ir/dfa/tag_deduplication.cc +++ b/re2c/src/ir/dfa/tag_deduplication.cc @@ -41,46 +41,43 @@ static bool dangling_arcs(const size_t *arcs, size_t narcs) * - There is a transition from S to some state S' (maybe equal to S) * that does not set T and T is alive in S'. */ -static void calc_live(const dfa_t &dfa, - const bool *fallback, - bool *visited, - bool *live, - size_t i) +static void calc_live(const dfa_t &dfa, const bool *fallback, bool *live) { - if (visited[i]) { - return; - } - - visited[i] = true; - dfa_state_t *s = dfa.states[i]; + const size_t nstates = dfa.states.size(); const size_t ntags = dfa.tags.size(); - // add tags before recursing to child states, - // so that tags propagate into loopbacks to this state - if (dangling_arcs(s->arcs, dfa.nchars)) { - if (s->rule != Rule::NONE) { - // final state, only rule tags are alive - add_tags_with_mask(&live[i * ntags], - dfa.rules[s->rule].tags, - dfa.tagpool[s->rule_tags], - ntags); - } else { - // transition to default state and dispatch on - // 'yyaccept': all fallback rules are potentially - // reachable, their tags are alive - // no mask (no rule implies no rule tags) - add_tags(&live[i * ntags], fallback, ntags); + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + if (dangling_arcs(s->arcs, dfa.nchars)) { + if (s->rule != Rule::NONE) { + // final state, only rule tags are alive + add_tags_with_mask(&live[i * ntags], + dfa.rules[s->rule].tags, + dfa.tagpool[s->rule_tags], + ntags); + } else { + // transition to default state and dispatch on + // 'yyaccept': all fallback rules are potentially + // reachable, their tags are alive + // no mask: no rule implies no rule tags + add_tags(&live[i * ntags], fallback, ntags); + } } } - for (size_t c = 0; c < dfa.nchars; ++c) { - const size_t j = s->arcs[c]; - if (j != dfa_t::NIL) { - calc_live(dfa, fallback, visited, live, j); - add_tags_with_mask(&live[i * ntags], - &live[j * ntags], - dfa.tagpool[s->tags[c]], - ntags); + for (bool loop = true; loop;) { + loop = false; + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + for (size_t c = 0; c < dfa.nchars; ++c) { + const size_t j = s->arcs[c]; + if (j != dfa_t::NIL) { + loop |= addcmp_tags_with_mask(&live[i * ntags], + &live[j * ntags], + dfa.tagpool[s->tags[c]], + ntags); + } + } } } } @@ -265,9 +262,8 @@ size_t deduplicate_tags(dfa_t &dfa, } const size_t nstates = dfa.states.size(); - bool *visited = new bool[nstates](); bool *live = new bool[nstates * ntags](); - calc_live(dfa, fbtags, visited, live, 0); + calc_live(dfa, fbtags, live); mask_dead(dfa, live); @@ -289,7 +285,6 @@ size_t deduplicate_tags(dfa_t &dfa, } delete[] fbtags; - delete[] visited; delete[] live; delete[] incompattbl; diff --git a/re2c/src/ir/tagpool.h b/re2c/src/ir/tagpool.h index c7e2aa7e..d5a8dca1 100644 --- a/re2c/src/ir/tagpool.h +++ b/re2c/src/ir/tagpool.h @@ -55,6 +55,18 @@ inline void add_tags_with_mask(bool *oldtags, const bool *newtags, } } +inline bool addcmp_tags_with_mask(bool *oldtags, const bool *newtags, + const bool *mask, size_t ntags) +{ + bool diff = false; + for (size_t i = 0; i < ntags; ++i) { + const bool old = oldtags[i]; + oldtags[i] |= newtags[i] & ~mask[i]; + diff |= old != oldtags[i]; + } + return diff; +} + } // namespace re2c #endif // _RE2C_IR_TAGPOOL_ diff --git a/re2c/test/tags/dedup5.i--tags.c b/re2c/test/tags/dedup5.i--tags.c new file mode 100644 index 00000000..5d0076e4 --- /dev/null +++ b/re2c/test/tags/dedup5.i--tags.c @@ -0,0 +1,111 @@ +/* Generated by re2c */ +// This test revealed a bug in liveness analyses that takes place +// during tag deduplication: in loops, live tags added by non-looping +// child paths failed to propagate into looping paths. + +// These two cases differ: in one case looping transition goes first (and tags are lost), +// in the other case non-looping transition goes first (and tags are not lost). + + +{ + YYCTYPE yych; + long yytag0p; + YYCTXMARKER = YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch (yych) { + case 'a': + yytag0p = (YYCURSOR - YYCTXMARKER); + goto yy4; + case 'b': goto yy5; + default: goto yy2; + } +yy2: + ++YYCURSOR; +yy3: + {} +yy4: + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'c': goto yy7; + default: goto yy3; + } +yy5: + ++YYCURSOR; + { (YYCTXMARKER + yytag0p) } +yy7: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': + yytag0p = (YYCURSOR - YYCTXMARKER); + goto yy9; + case 'b': goto yy5; + default: goto yy8; + } +yy8: + YYCURSOR = YYMARKER; + goto yy3; +yy9: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'c': goto yy7; + default: goto yy8; + } +} + + + +{ + YYCTYPE yych; + long yytag0p; + YYCTXMARKER = YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy14; + case 'b': + yytag0p = (YYCURSOR - YYCTXMARKER); + goto yy16; + default: goto yy12; + } +yy12: + ++YYCURSOR; +yy13: + {} +yy14: + ++YYCURSOR; + { (YYCTXMARKER + yytag0p) } +yy16: + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'c': goto yy17; + default: goto yy13; + } +yy17: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy14; + case 'b': + yytag0p = (YYCURSOR - YYCTXMARKER); + goto yy19; + default: goto yy18; + } +yy18: + YYCURSOR = YYMARKER; + goto yy13; +yy19: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'c': goto yy17; + default: goto yy18; + } +} + diff --git a/re2c/test/tags/dedup5.i--tags.re b/re2c/test/tags/dedup5.i--tags.re new file mode 100644 index 00000000..97927040 --- /dev/null +++ b/re2c/test/tags/dedup5.i--tags.re @@ -0,0 +1,16 @@ +// This test revealed a bug in liveness analyses that takes place +// during tag deduplication: in loops, live tags added by non-looping +// child paths failed to propagate into looping paths. + +// These two cases differ: in one case looping transition goes first (and tags are lost), +// in the other case non-looping transition goes first (and tags are not lost). + +/*!re2c + (@p "ac")* "b" { @p } + * {} +*/ + +/*!re2c + (@p "bc")* "a" { @p } + * {} +*/