]> granicus.if.org Git - re2c/commitdiff
Fixed yet another case of disordered tags update and cursor increment.
authorUlya Trofimovich <skvadrik@gmail.com>
Wed, 25 Jan 2017 17:50:45 +0000 (17:50 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Wed, 25 Jan 2017 17:50:45 +0000 (17:50 +0000)
/* note [tag hoisting, skip hoisting and tunneling]
 *
 * Tag hoisting is simple: if all transitions have the same commands,
 * they can be hoisted out of conditional branches.
 *
 * Skip hoisting is only relevant with '--eager-skip' option.
 * Normally this option is off and skip is lazy: it happens after
 * transition to the next state, if this state is consuming.
 * However, with '--eager-skip' skip happens before transition to the next
 * state. Different transitions may disagree: some of them go to consuming
 * states, others don't. If they agree, skip can be hoisted (just like tags).
 *
 * '--eager-skip' makes tag hoisting more complicated, because now we have
 * to care about the type of automaton: lookahead TDFAs must skip after
 * writing tags, while non-lookahead TDFAs must skip before writing tags.
 * Therefore skip hoising cannot be done without tag hoisting in lookahead
 * TDFAs, and vice versa with non-lookahead TDFAs.
 * (Note that '--eager-skip' is implied by '--no-lookahead').
 *
 * Tunneling splits base states in two parts: head and body. Body has all
 * the conditional branches (transitions on symbols), while head has just
 * one unconditional jump to body.
 *
 * Normally tag hoisting should go before tunneling: hoisting may add new
 * candidates to be merged by tunneling. However, with '--eager-skip' tag
 * hoisting is interwined with skip hoisting, and the latter needs to know
 * which states are consuming. This is not possible if tunneling is still
 * to be done, because it may turn consuming states into non-consuming ones.
 * Another option is to disallow splitting states with non-hoisted skip
 * in the presence of '--eager-skip' (this way skip hoisting wouldn't need
 * to know tunneling results), but it's much worse for tunneling.
 */

Found by slyfox's fuzzer. :)

14 files changed:
re2c/src/codegen/go.h
re2c/src/codegen/go_construct.cc
re2c/src/ir/adfa/adfa.h
re2c/src/ir/adfa/prepare.cc
re2c/test/tags/lost_yybackuptag.i--tags--no-lookahead--input(custom).c [moved from re2c/test/tags/lost_save_command.i--tags--no-lookahead--input(custom).c with 100% similarity]
re2c/test/tags/lost_yybackuptag.i--tags--no-lookahead--input(custom).re [moved from re2c/test/tags/lost_save_command.i--tags--no-lookahead--input(custom).re with 100% similarity]
re2c/test/tags/skip_tags_disorder1.i--tags--no-lookahead.c [moved from re2c/test/tags/skip_before_tags.i--tags--no-lookahead.c with 100% similarity]
re2c/test/tags/skip_tags_disorder1.i--tags--no-lookahead.re [moved from re2c/test/tags/skip_before_tags.i--tags--no-lookahead.re with 100% similarity]
re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.c [new file with mode: 0644]
re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.re [new file with mode: 0644]
re2c/test/tags/skip_tags_disorder3.i--eager-skip.c [new file with mode: 0644]
re2c/test/tags/skip_tags_disorder3.i--eager-skip.re [new file with mode: 0644]
re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.c [new file with mode: 0644]
re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.re [new file with mode: 0644]

index dc43350123d9f66142e5a0f3fc42d4358daaf4cd..3abcfc50ed45176f724c625f24ddf4c8f175bb73 100644 (file)
@@ -233,6 +233,8 @@ struct Go
        }
 };
 
+bool consume(const State *s);
+
 } // namespace re2c
 
 #endif // _RE2C_CODEGEN_GO_
index 7d648334149e2a6a94815ba569591e410ccba14a..6699263febe51f3e03bdc242222218b822dacf95 100644 (file)
@@ -14,7 +14,7 @@ namespace re2c
 
 static uint32_t unmap (Span * new_span, const Span * old_span, uint32_t old_nspans, const State * x);
 
-static bool consume(const State *s)
+bool consume(const State *s)
 {
        switch (s->action.type) {
                default: return true;
@@ -221,19 +221,6 @@ void Go::init(const State *from, const opt_t *opts, bitmaps_t &bitmaps)
                return;
        }
 
-       const Action::type_t a = from->action.type;
-       const bool need_skip = opts->eager_skip
-               && a != Action::RULE
-               && a != Action::ACCEPT;
-
-       skip = need_skip;
-       for (uint32_t i = 0; skip && i < nSpans; ++i) {
-               // in non-lookahead TDFA cursor is incremented before setting tags,
-               // so tags don't affect hoisting skip statement out of conditional branches
-               skip = consume(span[i].to)
-                       && (!opts->lookahead || span[i].tags == TCID0);
-       }
-
        // initialize high (wide) spans
        uint32_t hSpans = 0;
        const Span * hspan = NULL;
@@ -275,7 +262,7 @@ void Go::init(const State *from, const opt_t *opts, bitmaps_t &bitmaps)
        }
 
        const uint32_t dSpans = nSpans - hSpans - nBitmaps;
-       const bool part_skip = need_skip && !skip;
+       const bool part_skip = opts->eager_skip && !skip;
        if (opts->target == opt_t::DOT)
        {
                type = DOT;
index 5c7e4b2bb1a163d47316debdfc13a176a4fa5cb0..e64cbf09e01c2cca8207bf4ce57c55e4644856da 100644 (file)
@@ -104,6 +104,7 @@ private:
        void split (State *);
        void findBaseState ();
        void hoist_tags();
+       void hoist_tags_and_skip(const opt_t *opts);
        void count_used_labels (std::set<label_t> & used, label_t prolog, label_t start, bool force_start, bool fFlag) const;
        void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
        void emit_dot(OutputFile &o, bool last_cond) const;
index 551419d3513f1f314c46e1ef69ccf1ec198396c4..8bfe3b5d85c2f063d9dca42700d1ba6248d39a0b 100644 (file)
@@ -99,6 +99,39 @@ void DFA::findBaseState()
        operator delete (span);
 }
 
+/* note [tag hoisting, skip hoisting and tunneling]
+ *
+ * Tag hoisting is simple: if all transitions have the same commands,
+ * they can be hoisted out of conditional branches.
+ *
+ * Skip hoisting is only relevant with '--eager-skip' option.
+ * Normally this option is off and skip is lazy: it happens after
+ * transition to the next state, if this state is consuming.
+ * However, with '--eager-skip' skip happens before transition to the next
+ * state. Different transitions may disagree: some of them go to consuming
+ * states, others don't. If they agree, skip can be hoisted (just like tags).
+ *
+ * '--eager-skip' makes tag hoisting more complicated, because now we have
+ * to care about the type of automaton: lookahead TDFAs must skip after
+ * writing tags, while non-lookahead TDFAs must skip before writing tags.
+ * Therefore skip hoising cannot be done without tag hoisting in lookahead
+ * TDFAs, and vice versa with non-lookahead TDFAs.
+ * (Note that '--eager-skip' is implied by '--no-lookahead').
+ *
+ * Tunneling splits base states in two parts: head and body. Body has all
+ * the conditional branches (transitions on symbols), while head has just
+ * one unconditional jump to body.
+ *
+ * Normally tag hoisting should go before tunneling: hoisting may add new
+ * candidates to be merged by tunneling. However, with '--eager-skip' tag
+ * hoisting is interwined with skip hoisting, and the latter needs to know
+ * which states are consuming. This is not possible if tunneling is still
+ * to be done, because it may turn consuming states into non-consuming ones.
+ * Another option is to disallow splitting states with non-hoisted skip
+ * in the presence of '--eager-skip' (this way skip hoisting wouldn't need
+ * to know tunneling results), but it's much worse for tunneling.
+ */
+
 void DFA::prepare(const opt_t *opts)
 {
        // create rule states
@@ -149,9 +182,12 @@ void DFA::prepare(const opt_t *opts)
                default_state->action.set_accept(&accepts);
        }
 
-       // tag hoisting should be done before tunneling, but after
-       // binding default arcs (which may introduce new tags)
-       hoist_tags();
+       // tag hoisting should be done after binding default arcs:
+       // (which may introduce new tags)
+       // see note [tag hoisting, skip hoisting and tunneling]
+       if (!opts->eager_skip) {
+               hoist_tags();
+       }
 
        // split ``base'' states into two parts
        for (State * s = head; s; s = s->next)
@@ -180,6 +216,11 @@ void DFA::prepare(const opt_t *opts)
        // find ``base'' state, if possible
        findBaseState();
 
+       // see note [tag hoisting, skip hoisting and tunneling]
+       if (opts->eager_skip) {
+               hoist_tags_and_skip(opts);
+       }
+
        for (State *s = head; s; s = s->next) {
                s->go.init(s, opts, bitmaps);
        }
@@ -224,22 +265,72 @@ void DFA::calc_stats(uint32_t line, bool explicit_tags)
 void DFA::hoist_tags()
 {
        for (State * s = head; s; s = s->next) {
-               const size_t nsp = s->go.nSpans;
-               if (nsp > 0) {
-                       Span *sp = s->go.span;
-                       const tcid_t tags0 = sp[0].tags;
-                       bool common_tags = tags0 != TCID0;
-                       for (uint32_t i = 1; common_tags && i < nsp; ++i) {
-                               common_tags &= sp[i].tags == tags0;
+               Span *span = s->go.span;
+               const size_t nspan = s->go.nSpans;
+               if (nspan == 0) continue;
+
+               tcid_t tags = span[0].tags;
+               for (uint32_t i = 1; i < nspan; ++i) {
+                       if (span[i].tags != tags) {
+                               tags = TCID0;
+                               break;
                        }
-                       if (common_tags) {
-                               s->go.tags = tags0;
-                               for (uint32_t i = 0; i < nsp; ++i) {
-                                       sp[i].tags = TCID0;
-                               }
+               }
+               if (tags != TCID0) {
+                       s->go.tags = tags;
+                       for (uint32_t i = 0; i < nspan; ++i) {
+                               span[i].tags = TCID0;
                        }
                }
        }
 }
 
+void DFA::hoist_tags_and_skip(const opt_t *opts)
+{
+       assert(opts->eager_skip);
+
+       for (State * s = head; s; s = s->next) {
+               Span *span = s->go.span;
+               const size_t nspan = s->go.nSpans;
+               if (nspan == 0) continue;
+
+               bool hoist_tags = true, hoist_skip = true;
+
+               // do all spans agree on tags?
+               for (uint32_t i = 1; i < nspan; ++i) {
+                       if (span[i].tags != span[0].tags) {
+                               hoist_tags = false;
+                               break;
+                       }
+               }
+
+               // do all spans agree on skip?
+               for (uint32_t i = 0; i < nspan; ++i) {
+                       if (consume(span[i].to) != consume(span[0].to)) {
+                               hoist_skip = false;
+                               break;
+                       }
+               }
+
+               if (opts->lookahead) {
+                       // skip must go after tags
+                       hoist_skip &= hoist_tags;
+               } else {
+                       // skip must go before tags
+                       hoist_tags &= hoist_skip;
+               }
+
+               // hoisting tags is possible
+               if (hoist_tags) {
+                       s->go.tags = span[0].tags;
+                       for (uint32_t i = 0; i < nspan; ++i) {
+                               span[i].tags = TCID0;
+                       }
+               }
+
+               // hoisting skip is possible
+               s->go.skip = hoist_skip && consume(span[0].to);
+       }
+}
+
 } // namespace re2c
diff --git a/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.c b/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.c
new file mode 100644 (file)
index 0000000..0772d2f
--- /dev/null
@@ -0,0 +1,57 @@
+/* Generated by re2c */
+
+{
+       YYCTYPE yych;
+       yyt1 = YYCURSOR;
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR++;
+       yyt2 = YYCURSOR;
+       switch (yych) {
+       case 'a':       goto yy4;
+       case 'b':       goto yy7;
+       default:        goto yy2;
+       }
+yy2:
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR++;
+       yyt1 = yyt2;
+       yyt2 = YYCURSOR;
+       switch (yych) {
+       case 'a':       goto yy4;
+       case 'b':       goto yy7;
+       default:        goto yy2;
+       }
+yy4:
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'b':       goto yy6;
+       default:
+               ++YYCURSOR;
+               yyt1 = yyt2;
+               yyt2 = YYCURSOR;
+               goto yy4;
+       }
+yy6:
+       b = yyt1;
+       {}
+yy7:
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'a':       goto yy9;
+       default:
+               ++YYCURSOR;
+               yyt1 = yyt2;
+               yyt2 = YYCURSOR;
+               goto yy7;
+       }
+yy9:
+       a = yyt1;
+       {}
+}
+
+re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string]
+re2c: warning: line 4: rule matches empty string [-Wmatch-empty-string]
+re2c: warning: line 3: tag 'a' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags]
+re2c: warning: line 4: tag 'b' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags]
diff --git a/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.re b/re2c/test/tags/skip_tags_disorder2.i--tags--no-lookahead.re
new file mode 100644 (file)
index 0000000..1984e15
--- /dev/null
@@ -0,0 +1,6 @@
+/*!re2c
+
+(@a [^a])* {}
+(@b [^b])* {}
+
+*/
diff --git a/re2c/test/tags/skip_tags_disorder3.i--eager-skip.c b/re2c/test/tags/skip_tags_disorder3.i--eager-skip.c
new file mode 100644 (file)
index 0000000..b5db6b8
--- /dev/null
@@ -0,0 +1,47 @@
+/* Generated by re2c */
+
+{
+       YYCTYPE yych;
+       if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'b':
+               ++YYCURSOR;
+               goto yy3;
+       default:        goto yy2;
+       }
+yy2:
+       {}
+yy3:
+       yych = *(YYMARKER = YYCURSOR);
+       switch (yych) {
+       case 'c':       goto yy2;
+       default:        goto yy7;
+       }
+yy4:
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'c':
+               ++YYCURSOR;
+               goto yy8;
+       default:        goto yy5;
+       }
+yy5:
+       YYCURSOR = YYMARKER;
+       goto yy2;
+yy6:
+       YYMARKER = YYCURSOR;
+       if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR;
+yy7:
+       ++YYCURSOR;
+       switch (yych) {
+       case 'b':       goto yy6;
+       case 'c':       goto yy8;
+       default:        goto yy4;
+       }
+yy8:
+       {}
+}
+
+re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string]
diff --git a/re2c/test/tags/skip_tags_disorder3.i--eager-skip.re b/re2c/test/tags/skip_tags_disorder3.i--eager-skip.re
new file mode 100644 (file)
index 0000000..06a6362
--- /dev/null
@@ -0,0 +1,6 @@
+/*!re2c
+
+"b"* {}
+"b"+ [^c] "c" {}
+
+*/
diff --git a/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.c b/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.c
new file mode 100644 (file)
index 0000000..a74db64
--- /dev/null
@@ -0,0 +1,66 @@
+/* Generated by re2c */
+
+{
+       YYCTYPE yych;
+       if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+       yych = *YYCURSOR++;
+       switch (yych) {
+       case 'c':       goto yy4;
+       default:        goto yy2;
+       }
+yy2:
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR++;
+       switch (yych) {
+       case 'c':
+               yyt1 = YYCURSOR;
+               goto yy5;
+       default:        goto yy2;
+       }
+yy4:
+       yych = *YYCURSOR++;
+       yyt1 = YYCURSOR;
+       switch (yych) {
+       case 'a':       goto yy10;
+       case 'c':       goto yy5;
+       default:        goto yy8;
+       }
+yy5:
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'a':       goto yy7;
+       default:
+               ++YYCURSOR;
+               yyt1 = YYCURSOR;
+               goto yy5;
+       }
+yy7:
+       t = yyt1;
+       {}
+yy8:
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR++;
+       switch (yych) {
+       case 'a':       goto yy10;
+       case 'c':
+               yyt1 = YYCURSOR;
+               goto yy5;
+       default:        goto yy8;
+       }
+yy10:
+       if (YYLIMIT <= YYCURSOR) YYFILL(1);
+       yych = *YYCURSOR;
+       switch (yych) {
+       case 'c':       goto yy12;
+       default:
+               ++YYCURSOR;
+               goto yy10;
+       }
+yy12:
+       t = yyt1;
+       {}
+}
+
+re2c: warning: line 3: rule matches empty string [-Wmatch-empty-string]
+re2c: warning: line 3: tag 't' is non-deterministic and induces 2 parallel instances [-Wnondeterministic-tags]
diff --git a/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.re b/re2c/test/tags/skip_tags_disorder4.i--tags--no-lookahead.re
new file mode 100644 (file)
index 0000000..7adbd4e
--- /dev/null
@@ -0,0 +1,6 @@
+/*!re2c
+
+[^c]* ([^a] @t)* {}
+"c" [^c] @t [^c]* {}
+
+*/