From c6dbf271d18795cd5c9476eb3e9ec6ab9b80d4a2 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Wed, 25 Jan 2017 17:50:45 +0000 Subject: [PATCH] Fixed yet another case of disordered tags update and cursor increment. /* note [tag hoisting, skip hoisting and tunneling] * * Tag hoisting is simple: if all transitions have the same commands, * they can be hoisted out of conditional branches. * * Skip hoisting is only relevant with '--eager-skip' option. * Normally this option is off and skip is lazy: it happens after * transition to the next state, if this state is consuming. * However, with '--eager-skip' skip happens before transition to the next * state. Different transitions may disagree: some of them go to consuming * states, others don't. If they agree, skip can be hoisted (just like tags). * * '--eager-skip' makes tag hoisting more complicated, because now we have * to care about the type of automaton: lookahead TDFAs must skip after * writing tags, while non-lookahead TDFAs must skip before writing tags. * Therefore skip hoising cannot be done without tag hoisting in lookahead * TDFAs, and vice versa with non-lookahead TDFAs. * (Note that '--eager-skip' is implied by '--no-lookahead'). * * Tunneling splits base states in two parts: head and body. Body has all * the conditional branches (transitions on symbols), while head has just * one unconditional jump to body. * * Normally tag hoisting should go before tunneling: hoisting may add new * candidates to be merged by tunneling. However, with '--eager-skip' tag * hoisting is interwined with skip hoisting, and the latter needs to know * which states are consuming. This is not possible if tunneling is still * to be done, because it may turn consuming states into non-consuming ones. * Another option is to disallow splitting states with non-hoisted skip * in the presence of '--eager-skip' (this way skip hoisting wouldn't need * to know tunneling results), but it's much worse for tunneling. */ Found by slyfox's fuzzer. :) --- re2c/src/codegen/go.h | 2 + re2c/src/codegen/go_construct.cc | 17 +-- re2c/src/ir/adfa/adfa.h | 1 + re2c/src/ir/adfa/prepare.cc | 121 +++++++++++++++--- ...ag.i--tags--no-lookahead--input(custom).c} | 0 ...g.i--tags--no-lookahead--input(custom).re} | 0 ...ip_tags_disorder1.i--tags--no-lookahead.c} | 0 ...p_tags_disorder1.i--tags--no-lookahead.re} | 0 ...kip_tags_disorder2.i--tags--no-lookahead.c | 57 +++++++++ ...ip_tags_disorder2.i--tags--no-lookahead.re | 6 + .../tags/skip_tags_disorder3.i--eager-skip.c | 47 +++++++ .../tags/skip_tags_disorder3.i--eager-skip.re | 6 + ...kip_tags_disorder4.i--tags--no-lookahead.c | 66 ++++++++++ ...ip_tags_disorder4.i--tags--no-lookahead.re | 6 + 14 files changed, 299 insertions(+), 30 deletions(-) rename re2c/test/tags/{lost_save_command.i--tags--no-lookahead--input(custom).c => lost_yybackuptag.i--tags--no-lookahead--input(custom).c} (100%) rename re2c/test/tags/{lost_save_command.i--tags--no-lookahead--input(custom).re => lost_yybackuptag.i--tags--no-lookahead--input(custom).re} (100%) rename re2c/test/tags/{skip_before_tags.i--tags--no-lookahead.c => skip_tags_disorder1.i--tags--no-lookahead.c} (100%) rename re2c/test/tags/{skip_before_tags.i--tags--no-lookahead.re => skip_tags_disorder1.i--tags--no-lookahead.re} (100%) create mode 100644 re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.c create mode 100644 re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.re create mode 100644 re2c/test/tags/skip_tags_disorder3.i--eager-skip.c create mode 100644 re2c/test/tags/skip_tags_disorder3.i--eager-skip.re create mode 100644 re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.c create mode 100644 re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.re diff --git a/re2c/src/codegen/go.h b/re2c/src/codegen/go.h index dc433501..3abcfc50 100644 --- a/re2c/src/codegen/go.h +++ b/re2c/src/codegen/go.h @@ -233,6 +233,8 @@ struct Go } }; +bool consume(const State *s); + } // namespace re2c #endif // _RE2C_CODEGEN_GO_ diff --git a/re2c/src/codegen/go_construct.cc b/re2c/src/codegen/go_construct.cc index 7d648334..6699263f 100644 --- a/re2c/src/codegen/go_construct.cc +++ b/re2c/src/codegen/go_construct.cc @@ -14,7 +14,7 @@ namespace re2c static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x); -static bool consume(const State *s) +bool consume(const State *s) { switch (s->action.type) { default: return true; @@ -221,19 +221,6 @@ void Go::init(const State *from, const opt_t *opts, bitmaps_t &bitmaps) return; } - const Action::type_t a = from->action.type; - const bool need_skip = opts->eager_skip - && a != Action::RULE - && a != Action::ACCEPT; - - skip = need_skip; - for (uint32_t i = 0; skip && i < nSpans; ++i) { - // in non-lookahead TDFA cursor is incremented before setting tags, - // so tags don't affect hoisting skip statement out of conditional branches - skip = consume(span[i].to) - && (!opts->lookahead || span[i].tags == TCID0); - } - // initialize high (wide) spans uint32_t hSpans = 0; const Span * hspan = NULL; @@ -275,7 +262,7 @@ void Go::init(const State *from, const opt_t *opts, bitmaps_t &bitmaps) } const uint32_t dSpans = nSpans - hSpans - nBitmaps; - const bool part_skip = need_skip && !skip; + const bool part_skip = opts->eager_skip && !skip; if (opts->target == opt_t::DOT) { type = DOT; diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index 5c7e4b2b..e64cbf09 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -104,6 +104,7 @@ private: void split (State *); void findBaseState (); void hoist_tags(); + void hoist_tags_and_skip(const opt_t *opts); void count_used_labels (std::set & used, label_t prolog, label_t start, bool force_start, bool fFlag) const; void emit_body (OutputFile &, uint32_t &, const std::set & used_labels, label_t initial) const; void emit_dot(OutputFile &o, bool last_cond) const; diff --git a/re2c/src/ir/adfa/prepare.cc b/re2c/src/ir/adfa/prepare.cc index 551419d3..8bfe3b5d 100644 --- a/re2c/src/ir/adfa/prepare.cc +++ b/re2c/src/ir/adfa/prepare.cc @@ -99,6 +99,39 @@ void DFA::findBaseState() operator delete (span); } +/* note [tag hoisting, skip hoisting and tunneling] + * + * Tag hoisting is simple: if all transitions have the same commands, + * they can be hoisted out of conditional branches. + * + * Skip hoisting is only relevant with '--eager-skip' option. + * Normally this option is off and skip is lazy: it happens after + * transition to the next state, if this state is consuming. + * However, with '--eager-skip' skip happens before transition to the next + * state. Different transitions may disagree: some of them go to consuming + * states, others don't. If they agree, skip can be hoisted (just like tags). + * + * '--eager-skip' makes tag hoisting more complicated, because now we have + * to care about the type of automaton: lookahead TDFAs must skip after + * writing tags, while non-lookahead TDFAs must skip before writing tags. + * Therefore skip hoising cannot be done without tag hoisting in lookahead + * TDFAs, and vice versa with non-lookahead TDFAs. + * (Note that '--eager-skip' is implied by '--no-lookahead'). + * + * Tunneling splits base states in two parts: head and body. Body has all + * the conditional branches (transitions on symbols), while head has just + * one unconditional jump to body. + * + * Normally tag hoisting should go before tunneling: hoisting may add new + * candidates to be merged by tunneling. However, with '--eager-skip' tag + * hoisting is interwined with skip hoisting, and the latter needs to know + * which states are consuming. This is not possible if tunneling is still + * to be done, because it may turn consuming states into non-consuming ones. + * Another option is to disallow splitting states with non-hoisted skip + * in the presence of '--eager-skip' (this way skip hoisting wouldn't need + * to know tunneling results), but it's much worse for tunneling. + */ + void DFA::prepare(const opt_t *opts) { // create rule states @@ -149,9 +182,12 @@ void DFA::prepare(const opt_t *opts) default_state->action.set_accept(&accepts); } - // tag hoisting should be done before tunneling, but after - // binding default arcs (which may introduce new tags) - hoist_tags(); + // tag hoisting should be done after binding default arcs: + // (which may introduce new tags) + // see note [tag hoisting, skip hoisting and tunneling] + if (!opts->eager_skip) { + hoist_tags(); + } // split ``base'' states into two parts for (State * s = head; s; s = s->next) @@ -180,6 +216,11 @@ void DFA::prepare(const opt_t *opts) // find ``base'' state, if possible findBaseState(); + // see note [tag hoisting, skip hoisting and tunneling] + if (opts->eager_skip) { + hoist_tags_and_skip(opts); + } + for (State *s = head; s; s = s->next) { s->go.init(s, opts, bitmaps); } @@ -224,22 +265,72 @@ void DFA::calc_stats(uint32_t line, bool explicit_tags) void DFA::hoist_tags() { for (State * s = head; s; s = s->next) { - const size_t nsp = s->go.nSpans; - if (nsp > 0) { - Span *sp = s->go.span; - const tcid_t tags0 = sp[0].tags; - bool common_tags = tags0 != TCID0; - for (uint32_t i = 1; common_tags && i < nsp; ++i) { - common_tags &= sp[i].tags == tags0; + Span *span = s->go.span; + const size_t nspan = s->go.nSpans; + if (nspan == 0) continue; + + tcid_t tags = span[0].tags; + for (uint32_t i = 1; i < nspan; ++i) { + if (span[i].tags != tags) { + tags = TCID0; + break; } - if (common_tags) { - s->go.tags = tags0; - for (uint32_t i = 0; i < nsp; ++i) { - sp[i].tags = TCID0; - } + } + if (tags != TCID0) { + s->go.tags = tags; + for (uint32_t i = 0; i < nspan; ++i) { + span[i].tags = TCID0; } } } } +void DFA::hoist_tags_and_skip(const opt_t *opts) +{ + assert(opts->eager_skip); + + for (State * s = head; s; s = s->next) { + Span *span = s->go.span; + const size_t nspan = s->go.nSpans; + if (nspan == 0) continue; + + bool hoist_tags = true, hoist_skip = true; + + // do all spans agree on tags? + for (uint32_t i = 1; i < nspan; ++i) { + if (span[i].tags != span[0].tags) { + hoist_tags = false; + break; + } + } + + // do all spans agree on skip? + for (uint32_t i = 0; i < nspan; ++i) { + if (consume(span[i].to) != consume(span[0].to)) { + hoist_skip = false; + break; + } + } + + if (opts->lookahead) { + // skip must go after tags + hoist_skip &= hoist_tags; + } else { + // skip must go before tags + hoist_tags &= hoist_skip; + } + + // hoisting tags is possible + if (hoist_tags) { + s->go.tags = span[0].tags; + for (uint32_t i = 0; i < nspan; ++i) { + span[i].tags = TCID0; + } + } + + // hoisting skip is possible + s->go.skip = hoist_skip && consume(span[0].to); + } +} + } // namespace re2c diff --git a/re2c/test/tags/lost_save_command.i--tags--no-lookahead--input(custom).c b/re2c/test/tags/lost_yybackuptag.i--tags--no-lookahead--input(custom).c similarity index 100% rename from re2c/test/tags/lost_save_command.i--tags--no-lookahead--input(custom).c rename to re2c/test/tags/lost_yybackuptag.i--tags--no-lookahead--input(custom).c diff --git a/re2c/test/tags/lost_save_command.i--tags--no-lookahead--input(custom).re b/re2c/test/tags/lost_yybackuptag.i--tags--no-lookahead--input(custom).re similarity index 100% rename from re2c/test/tags/lost_save_command.i--tags--no-lookahead--input(custom).re rename to re2c/test/tags/lost_yybackuptag.i--tags--no-lookahead--input(custom).re diff --git a/re2c/test/tags/skip_before_tags.i--tags--no-lookahead.c b/re2c/test/tags/skip_tags_disorder1.i--tags--no-lookahead.c similarity index 100% rename from re2c/test/tags/skip_before_tags.i--tags--no-lookahead.c rename to re2c/test/tags/skip_tags_disorder1.i--tags--no-lookahead.c diff --git a/re2c/test/tags/skip_before_tags.i--tags--no-lookahead.re b/re2c/test/tags/skip_tags_disorder1.i--tags--no-lookahead.re similarity index 100% rename from re2c/test/tags/skip_before_tags.i--tags--no-lookahead.re rename to re2c/test/tags/skip_tags_disorder1.i--tags--no-lookahead.re diff --git a/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.c b/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.c new file mode 100644 index 00000000..0772d2fc --- /dev/null +++ b/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.c @@ -0,0 +1,57 @@ +/* Generated by re2c */ + +{ + YYCTYPE yych; + yyt1 = YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR++; + yyt2 = YYCURSOR; + switch (yych) { + case 'a': goto yy4; + case 'b': goto yy7; + default: goto yy2; + } +yy2: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR++; + yyt1 = yyt2; + yyt2 = YYCURSOR; + switch (yych) { + case 'a': goto yy4; + case 'b': goto yy7; + default: goto yy2; + } +yy4: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'b': goto yy6; + default: + ++YYCURSOR; + yyt1 = yyt2; + yyt2 = YYCURSOR; + goto yy4; + } +yy6: + b = yyt1; + {} +yy7: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy9; + default: + ++YYCURSOR; + yyt1 = yyt2; + yyt2 = YYCURSOR; + goto yy7; + } +yy9: + a = yyt1; + {} +} + +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 4: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: tag 'a' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] +re2c: warning: line 4: tag 'b' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.re b/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.re new file mode 100644 index 00000000..1984e15e --- /dev/null +++ b/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.re @@ -0,0 +1,6 @@ +/*!re2c + +(@a [^a])* {} +(@b [^b])* {} + +*/ diff --git a/re2c/test/tags/skip_tags_disorder3.i--eager-skip.c b/re2c/test/tags/skip_tags_disorder3.i--eager-skip.c new file mode 100644 index 00000000..b5db6b82 --- /dev/null +++ b/re2c/test/tags/skip_tags_disorder3.i--eager-skip.c @@ -0,0 +1,47 @@ +/* Generated by re2c */ + +{ + YYCTYPE yych; + if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); + yych = *YYCURSOR; + switch (yych) { + case 'b': + ++YYCURSOR; + goto yy3; + default: goto yy2; + } +yy2: + {} +yy3: + yych = *(YYMARKER = YYCURSOR); + switch (yych) { + case 'c': goto yy2; + default: goto yy7; + } +yy4: + yych = *YYCURSOR; + switch (yych) { + case 'c': + ++YYCURSOR; + goto yy8; + default: goto yy5; + } +yy5: + YYCURSOR = YYMARKER; + goto yy2; +yy6: + YYMARKER = YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; +yy7: + ++YYCURSOR; + switch (yych) { + case 'b': goto yy6; + case 'c': goto yy8; + default: goto yy4; + } +yy8: + {} +} + +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] diff --git a/re2c/test/tags/skip_tags_disorder3.i--eager-skip.re b/re2c/test/tags/skip_tags_disorder3.i--eager-skip.re new file mode 100644 index 00000000..06a63624 --- /dev/null +++ b/re2c/test/tags/skip_tags_disorder3.i--eager-skip.re @@ -0,0 +1,6 @@ +/*!re2c + +"b"* {} +"b"+ [^c] "c" {} + +*/ diff --git a/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.c b/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.c new file mode 100644 index 00000000..a74db64f --- /dev/null +++ b/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.c @@ -0,0 +1,66 @@ +/* Generated by re2c */ + +{ + YYCTYPE yych; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR++; + switch (yych) { + case 'c': goto yy4; + default: goto yy2; + } +yy2: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR++; + switch (yych) { + case 'c': + yyt1 = YYCURSOR; + goto yy5; + default: goto yy2; + } +yy4: + yych = *YYCURSOR++; + yyt1 = YYCURSOR; + switch (yych) { + case 'a': goto yy10; + case 'c': goto yy5; + default: goto yy8; + } +yy5: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy7; + default: + ++YYCURSOR; + yyt1 = YYCURSOR; + goto yy5; + } +yy7: + t = yyt1; + {} +yy8: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR++; + switch (yych) { + case 'a': goto yy10; + case 'c': + yyt1 = YYCURSOR; + goto yy5; + default: goto yy8; + } +yy10: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'c': goto yy12; + default: + ++YYCURSOR; + goto yy10; + } +yy12: + t = yyt1; + {} +} + +re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 3: tag 't' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags] diff --git a/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.re b/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.re new file mode 100644 index 00000000..7adbd4e3 --- /dev/null +++ b/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.re @@ -0,0 +1,6 @@ +/*!re2c + +[^c]* ([^a] @t)* {} +"c" [^c] @t [^c]* {} + +*/ -- 2.40.0