From: Ulya Trofimovich Date: Thu, 19 May 2016 12:04:11 +0000 (+0100) Subject: Tag deduplication should go before DFA minimization. X-Git-Tag: 1.0~39^2~289 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f23886af40f7bc2d5ee4048cde09e116bf7d72a8;p=re2c Tag deduplication should go before DFA minimization. Tags prevent minimization in some cases; deduplicating them before minimzation gives better chances to the latter. --- diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index cd4b7615..94eafb69 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -18,7 +18,6 @@ namespace re2c DFA::DFA ( const dfa_t &dfa , const std::vector &fill - , const std::vector &fallback , Skeleton *skel , const charset_t &charset , const std::string &n @@ -82,6 +81,8 @@ DFA::DFA s->rule = t->rule; s->rule_tags = t->rule_tags; s->fill = fill[i]; + s->fallback = fallback_state(dfa, i); // see note [fallback states] + s->go.span = allocate(nchars); uint32_t j = 0; for (uint32_t c = 0; c < nchars; ++j) @@ -97,10 +98,6 @@ DFA::DFA } *p = NULL; - for (size_t i = 0; i < fallback.size(); ++i) { - i2s[fallback[i]]->fallback = true; - } - delete[] i2s; } diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index 5434fbf7..433988fe 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -77,7 +77,6 @@ struct DFA DFA ( const dfa_t &dfa , const std::vector &fill - , const std::vector &fallback , Skeleton *skel , const charset_t &charset , const std::string &n diff --git a/re2c/src/ir/compile.cc b/re2c/src/ir/compile.cc index c380f889..803f20c5 100644 --- a/re2c/src/ir/compile.cc +++ b/re2c/src/ir/compile.cc @@ -64,21 +64,17 @@ static smart_ptr compile_rules( // but prior to any other DFA transformations Skeleton *skeleton = new Skeleton(dfa, cs, defrule, name, cond, line); + // try to minimize the number of tag variables + const size_t used_tags = deduplicate_tags(dfa); + minimization(dfa); // find YYFILL states and calculate argument to YYFILL std::vector fill; fillpoints(dfa, fill); - // see note [fallback states] - std::vector fallback; - fallback_states(dfa, fallback); - - // try to minimize the number of tag variables - const size_t used_tags = deduplicate_tags(dfa, fallback); - // ADFA stands for 'DFA with actions' - DFA *adfa = new DFA(dfa, fill, fallback, skeleton, cs, + DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line, used_tags); // see note [reordering DFA states] diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 56bebc7a..7b4f9e04 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -64,8 +64,8 @@ enum dfa_minimization_t void minimization(dfa_t &dfa); void fillpoints(const dfa_t &dfa, std::vector &fill); -void fallback_states(const dfa_t &dfa, std::vector &fallback); -size_t deduplicate_tags(dfa_t &dfa, const std::vector &fallback); +bool fallback_state(const dfa_t &dfa, size_t i); +size_t deduplicate_tags(dfa_t &dfa); } // namespace re2c diff --git a/re2c/src/ir/dfa/fallback.cc b/re2c/src/ir/dfa/fallback.cc index 94419036..5b78a579 100644 --- a/re2c/src/ir/dfa/fallback.cc +++ b/re2c/src/ir/dfa/fallback.cc @@ -5,27 +5,28 @@ namespace re2c /* note [fallback states] * - * Find accepting states that may be shadowed by other accepting - * states: when the short rule matches, lexer must try to match - * longer rules; if this attempt is unsuccessful it must fallback - * to the short match. + * Check if the given state is accepting, but may be shadowed + * by other accepting states: when the short rule matches, + * lexer must try to match longer rules; if this attempt is + * unsuccessful it must fallback to the short match. */ -void fallback_states(const dfa_t &dfa, std::vector &fallback) +bool fallback_state(const dfa_t &dfa, size_t i) { - const size_t count = dfa.states.size(); - for (size_t i = 0; i < count; ++i) { - dfa_state_t *s = dfa.states[i]; - if (s->rule != Rule::NONE) { - for (size_t c = 0; c < dfa.nchars; ++c) { - const size_t j = s->arcs[c]; - if (j != dfa_t::NIL - && dfa.states[j]->rule == Rule::NONE) { - fallback.push_back(i); - break; - } - } + dfa_state_t *s = dfa.states[i]; + + if (s->rule == Rule::NONE) { + return false; + } + + for (size_t c = 0; c < dfa.nchars; ++c) { + const size_t j = s->arcs[c]; + if (j != dfa_t::NIL + && dfa.states[j]->rule == Rule::NONE) { + return true; } } + + return false; } } // namespace re2c diff --git a/re2c/src/ir/dfa/tag_deduplication.cc b/re2c/src/ir/dfa/tag_deduplication.cc index 6ecfc66b..88c8e779 100644 --- a/re2c/src/ir/dfa/tag_deduplication.cc +++ b/re2c/src/ir/dfa/tag_deduplication.cc @@ -239,28 +239,38 @@ static void patch_tags(dfa_t &dfa, const size_t *represent) } } -size_t deduplicate_tags(dfa_t &dfa, - const std::vector &fallback) +// see note [fallback states] +// fallback tags are all tags that belong to fallback rules +static size_t fallback_tags(const dfa_t &dfa) +{ + const size_t nstates = dfa.states.size(); + size_t tags = 0; + for (size_t i = 0; i < nstates; ++i) { + if (fallback_state(dfa, i)) { + const size_t r = dfa.states[i]->rule; + tags = dfa.tagpool.orl(tags, dfa.rules[r].tags); + } + } + return tags; +} + +size_t deduplicate_tags(dfa_t &dfa) { const size_t ntags = dfa.tags.size(); if (ntags == 0) { return 0; } - size_t fbtags = 0; - for (size_t i = 0; i < fallback.size(); ++i) { - const size_t r = dfa.states[fallback[i]]->rule; - fbtags = dfa.tagpool.orl(fbtags, dfa.rules[r].tags); - } + const size_t fallback = fallback_tags(dfa); const size_t nstates = dfa.states.size(); size_t *live = new size_t[nstates](); - calc_live(dfa, fbtags, live); + calc_live(dfa, fallback, live); mask_dead(dfa, live); bool *incompattbl = new bool[ntags * ntags](); - incompatibility_table(dfa, live, fbtags, incompattbl); + incompatibility_table(dfa, live, fallback, incompattbl); size_t *represent = new size_t[ntags](); equivalence_classes(incompattbl, ntags, represent); diff --git a/re2c/test/tags/dedup2.i--input(custom).c b/re2c/test/tags/dedup2.i--input(custom).c index d20eec34..a615a71d 100644 --- a/re2c/test/tags/dedup2.i--input(custom).c +++ b/re2c/test/tags/dedup2.i--input(custom).c @@ -2,7 +2,7 @@ { YYCTYPE yych; - if (YYLESSTHAN (3)) YYFILL(3); + if (YYLESSTHAN (2)) YYFILL(2); yych = YYPEEK (); switch (yych) { case 'a': goto yy4; @@ -16,10 +16,8 @@ yy2: {} yy4: YYSKIP (); - switch ((yych = YYPEEK ())) { - case 'a': goto yy10; - default: goto yy8; - } + yych = YYPEEK (); + goto yy8; YYRESTORECTX (); {} yy6: @@ -32,10 +30,6 @@ yy8: if (YYLESSTHAN (1)) YYFILL(1); yych = YYPEEK (); goto yy8; -yy10: - YYSKIP (); - yych = YYPEEK (); - goto yy8; } re2c: warning: line 2: unreachable rule [-Wunreachable-rules] diff --git a/re2c/test/tags/dedup2.i.c b/re2c/test/tags/dedup2.i.c index 4d37bb76..0550f646 100644 --- a/re2c/test/tags/dedup2.i.c +++ b/re2c/test/tags/dedup2.i.c @@ -2,7 +2,7 @@ { YYCTYPE yych; - if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); yych = *YYCURSOR; switch (yych) { case 'a': goto yy4; @@ -16,10 +16,8 @@ yy2: {} yy4: ++YYCURSOR; - switch ((yych = *YYCURSOR)) { - case 'a': goto yy10; - default: goto yy8; - } + yych = *YYCURSOR; + goto yy8; YYCURSOR = YYCTXMARKER; {} yy6: @@ -32,10 +30,6 @@ yy8: if (YYLIMIT <= YYCURSOR) YYFILL(1); yych = *YYCURSOR; goto yy8; -yy10: - ++YYCURSOR; - yych = *YYCURSOR; - goto yy8; } re2c: warning: line 2: unreachable rule [-Wunreachable-rules]