Reduce all final versions of the given tag to one common version.
authorUlya Trofimovich <skvadrik@gmail.com>
Mon, 14 Nov 2016 17:15:30 +0000 (17:15 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Mon, 14 Nov 2016 17:19:26 +0000 (17:19 +0000)
Tag may have different final versions in different accepting states
(currently either fallback version of the main version). Now we
reduce all final versions to one (this takes one additional 'copy'
command for normal tags and none for tags that have fallback copy).

23 files changed:
re2c/src/ir/adfa/adfa.cc
re2c/src/ir/adfa/adfa.h
re2c/src/ir/adfa/prepare.cc
re2c/src/ir/dfa/cfg/cfg.cc
re2c/src/ir/dfa/cfg/cfg.h
re2c/src/ir/dfa/cfg/dce.cc
re2c/src/ir/dfa/cfg/interfere.cc
re2c/src/ir/dfa/cfg/liveanal.cc
re2c/src/ir/dfa/cfg/normalize.cc
re2c/src/ir/dfa/cfg/optimize.cc
re2c/src/ir/dfa/cfg/rename.cc
re2c/src/ir/dfa/cfg/varalloc.cc
re2c/src/ir/dfa/closure.cc
re2c/src/ir/dfa/determinization.cc
re2c/src/ir/dfa/dfa.h
re2c/src/ir/dfa/fallback_tags.cc
re2c/src/ir/tcmd.cc
re2c/src/ir/tcmd.h
re2c/test/tags/copy_coalescing1.i--tags.c
re2c/test/tags/fallback3.i--tags.c
re2c/test/tags/fallback4.i--tags.c
re2c/test/tags/fallback5.i--tags.c
re2c/test/tags/interference.i--tags.c

index efcfef5417df10b20237c129ff35d4098074b7ae..0bf795df8b2c626169d931a4c69146e98c5fbc3e 100644 (file)
@@ -65,6 +65,7 @@ DFA::DFA
 
                s->rule = t->rule;
                s->rule_tags = t->tcid[dfa.nchars];
+               s->fall_tags = t->tcid[dfa.nchars + 1];
                s->fill = fill[i];
                s->fallback = t->fallback; // see note [fallback states]
 
index 0baae8d281bce66cfe19d4c7996b9a6f7f20888d..f676cd77643315bee903b8a939bb18b113d7c19a 100644 (file)
@@ -31,6 +31,7 @@ struct State
 
        size_t rule;
        tcid_t rule_tags;
+       tcid_t fall_tags;
        bool isBase;
        Go go;
        Action action;
@@ -42,6 +43,7 @@ struct State
                , fallback (false)
                , rule (Rule::NONE)
                , rule_tags (TCID0)
+               , fall_tags (TCID0)
                , isBase (false)
                , go ()
                , action ()
index 8398398f768f20155ce68788d46479455b035d4d..d6afd06ccfb2ecf0cb8b2ca7cfcd596708a3f452 100644 (file)
@@ -21,6 +21,7 @@ void DFA::split(State *s)
        move->fill = s->fill;
        move->go = s->go;
        move->rule_tags = s->rule_tags;
+       move->fall_tags = s->fall_tags;
        s->rule = Rule::NONE;
        s->go.nSpans = 1;
        s->go.span = allocate<Span> (1);
@@ -113,14 +114,10 @@ void DFA::prepare ()
                                rule2state[s->rule] = n;
                                addState(n, s);
                        }
-
-                       // exclude 'copy' commands, they are for fallback paths only
-                       const tcid_t id = tcpool.insert(tcpool[s->rule_tags].save, NULL);
-
                        for (uint32_t i = 0; i < s->go.nSpans; ++i) {
                                if (!s->go.span[i].to) {
                                        s->go.span[i].to = rule2state[s->rule];
-                                       s->go.span[i].tags = id;
+                                       s->go.span[i].tags = s->rule_tags;
                                }
                        }
                }
@@ -148,7 +145,7 @@ void DFA::prepare ()
        if (default_state) {
                for (State *s = head; s; s = s->next) {
                        if (s->fallback) {
-                               const std::pair<const State*, tcid_t> acc(rule2state[s->rule], s->rule_tags);
+                               const std::pair<const State*, tcid_t> acc(rule2state[s->rule], s->fall_tags);
                                s->action.set_save(accepts.find_or_add(acc));
                        }
                }
index 0c5bcf31f403d45416bf11f0b74ea3e72f6f445b..450ff7f90bab7ce17cbcbd1db1eb702c9aeb4e00 100644 (file)
@@ -5,8 +5,8 @@
 namespace re2c
 {
 
-static cfg_ix_t map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb);
-static cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbblock);
+static void map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb, cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall);
+static cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbbfin, cfg_ix_t nbbfall);
 static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, tcmd_t *cmd, tagver_t *use);
 static void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x);
 static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x);
@@ -14,79 +14,100 @@ static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_i
 cfg_t::cfg_t(dfa_t &a)
        : dfa(a)
        , bblocks(NULL)
-       , nbblock(0)
+       , nbbarc(0)
+       , nbbfin(0)
+       , nbbfall(0)
 {
        const size_t
                nstate = dfa.states.size(),
-               narc = nstate * dfa.nchars;
-       cfg_ix_t *arc2bb = new cfg_ix_t[narc + nstate];
+               nsym = dfa.nchars;
+       cfg_ix_t *arc2bb = new cfg_ix_t[nstate * (nsym + 2)];
 
-       nbblock = map_arcs_to_bblocks(dfa, arc2bb);
-       bblocks = create_bblocks(dfa, arc2bb, nbblock);
+       map_arcs_to_bblocks(dfa, arc2bb, nbbarc, nbbfin, nbbfall);
+       bblocks = create_bblocks(dfa, arc2bb, nbbfin, nbbfall);
 
        delete[] arc2bb;
 }
 
-cfg_ix_t map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb)
+void map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb,
+       cfg_ix_t &nbbarc, cfg_ix_t &nbbfin, cfg_ix_t &nbbfall)
 {
        const size_t
                nstate = dfa.states.size(),
                nsym = dfa.nchars;
 
        // first bblock is CFG root: it has no counterpart in DFA
-       cfg_ix_t nbblock = 1;
+       cfg_ix_t nbb = 1;
 
+       // bblocks for tagged transitions
        for (size_t i = 0; i < nstate; ++i) {
-               const dfa_state_t *s = dfa.states[i];
-
-               // bblocks for tagged transitions in DFA
-               for (size_t c = 0; c < nsym; ++c) {
-                       const tcmd_t &cmd = s->tcmd[c];
-                       *arc2bb++ = cmd.save || cmd.copy ? nbblock++ : 0;
+               const tcmd_t *c = dfa.states[i]->tcmd, *f = c + nsym;
+               for (; c < f; ++c) {
+                       *arc2bb++ = c->empty() ? 0 : nbb++;
                }
+       }
+       nbbarc = nbb;
 
-               // bblocks for final DFA states with rules that have tags
-               *arc2bb++ = s->rule != Rule::NONE
-                       && dfa.rules[s->rule].tags != ZERO_TAGS
-                       ? nbblock++ : 0;
+       // bblock for final tagged epsilon-transition
+       for (size_t i = 0; i < nstate; ++i) {
+               const tcmd_t &f = dfa.states[i]->tcmd[nsym];
+               *arc2bb++ = f.empty() ? 0 : nbb++;
        }
+       nbbfin = nbb;
 
-       return nbblock;
+       // bblock for fallback tagged epsilon-transition
+       for (size_t i = 0; i < nstate; ++i) {
+               const dfa_state_t *s = dfa.states[i];
+               // (check final tags: fallback tags may be empty)
+               *arc2bb++ = s->fallback && !s->tcmd[nsym].empty() ? nbb++ : 0;
+       }
+       nbbfall = nbb;
 }
 
-cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbblock)
+cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb,
+       cfg_ix_t nbbfin, cfg_ix_t nbbfall)
 {
        const size_t
                nstate = dfa.states.size(),
                nsym = dfa.nchars;
        const cfg_ix_t *a2b = arc2bb;
-       cfg_ix_t *succb = new cfg_ix_t[nbblock], *succe;
+       cfg_ix_t *succb = new cfg_ix_t[nbbfin], *succe;
        bool *been = new bool[nstate];
 
-       cfg_bb_t *bblocks = new cfg_bb_t[nbblock], *b = bblocks;
+       cfg_bb_t *bblocks = new cfg_bb_t[nbbfall], *b = bblocks;
 
        // root bblock
        std::fill(been, been + nstate, false);
        successors(dfa, arc2bb, been, succe = succb, 0);
-       basic_block(b++, succb, succe, new tcmd_t, TAGVER_ZERO);
+       basic_block(b++, succb, succe, new tcmd_t, NULL);
 
+       // transition bblocks
        for (size_t i = 0; i < nstate; ++i) {
                const dfa_state_t *s = dfa.states[i];
-
-               // transition bblocks
                for (size_t c = 0; c < nsym; ++c) {
                        if (*a2b++ != 0) {
                                std::fill(been, been + nstate, false);
                                successors(dfa, arc2bb, been, succe = succb, s->arcs[c]);
-                               basic_block(b++, succb, succe, &s->tcmd[c], TAGVER_ZERO);
+                               basic_block(b++, succb, succe, &s->tcmd[c], NULL);
                        }
                }
+       }
 
-               // final bblocks
+       // final bblocks
+       for (size_t i = 0; i < nstate; ++i) {
                if (*a2b++ != 0) {
+                       const dfa_state_t *s = dfa.states[i];
+                       basic_block(b++, NULL, NULL, &s->tcmd[nsym], dfa.rules[s->rule].tags);
+               }
+       }
+
+       // fallback bblocks
+       for (size_t i = 0; i < nstate; ++i) {
+               if (*a2b++ != 0) {
+                       const dfa_state_t *s = dfa.states[i];
                        std::fill(been, been + nstate, false);
                        fallback(dfa, arc2bb, been, succe = succb, i);
-                       basic_block(b++, succb, succe, &s->tcmd[nsym], dfa.rules[s->rule].tags);
+                       basic_block(b++, succb, succe, &s->tcmd[nsym + 1], dfa.rules[s->rule].tags);
                }
        }
 
@@ -116,9 +137,10 @@ void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
        been[x] = true;
 
        const size_t
+               nstate = dfa.states.size(),
                nsym = dfa.nchars,
                *a = dfa.states[x]->arcs;
-       const cfg_ix_t *a2b = &arc2bb[x * (nsym + 1)];
+       const cfg_ix_t *a2b = &arc2bb[x * nsym];
 
        for (size_t c = 0; c < nsym; ++c) {
                const cfg_ix_t b = a2b[c];
@@ -129,9 +151,9 @@ void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
                }
        }
 
-       const cfg_ix_t b = a2b[nsym];
-       if (b != 0) {
-               *succ++ = b;
+       const cfg_ix_t f = arc2bb[nstate * nsym + x];
+       if (f != 0) {
+               *succ++ = f;
        }
 }
 
@@ -147,7 +169,7 @@ void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
        const size_t
                nsym = dfa.nchars,
                *a = dfa.states[x]->arcs;
-       const cfg_ix_t *a2b = &arc2bb[x * (nsym + 1)];
+       const cfg_ix_t *a2b = &arc2bb[x * nsym];
 
        for (size_t c = 0; c < nsym; ++c) {
                const size_t y = a[c];
@@ -163,7 +185,7 @@ void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been,
 
 cfg_t::~cfg_t()
 {
-       cfg_bb_t *b = bblocks, *e = b + nbblock;
+       cfg_bb_t *b = bblocks, *e = b + nbbfall;
 
        delete b->cmd;
 
index 26489ccd282eac9d2f40ecb8ee20fe267baedf64..d9d69be8bef38ae04dc1552e1986d4829c08b3c2 100644 (file)
@@ -21,7 +21,9 @@ struct cfg_t
 {
        dfa_t &dfa;
        cfg_bb_t *bblocks;
-       cfg_ix_t nbblock;
+       cfg_ix_t nbbarc;
+       cfg_ix_t nbbfin;
+       cfg_ix_t nbbfall;
 
        explicit cfg_t(dfa_t &a);
        ~cfg_t();
index 60fea7ced1d465a68819ad3db843ef77159ce60c..78bc517fba80b6d22654dac4b0c0da232871f93c 100644 (file)
@@ -6,7 +6,8 @@ namespace re2c
 void cfg_t::dead_code_elimination(cfg_t &cfg, const bool *live)
 {
        const tagver_t nver = cfg.dfa.maxtagver + 1;
-       cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
+       // final and fallback tags can't be dead by construction
+       cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbarc;
 
        for (; b < e; ++b, live += nver) {
                for (tagsave_t *s, **ps = &b->cmd->save; (s = *ps);) {
@@ -16,8 +17,6 @@ void cfg_t::dead_code_elimination(cfg_t &cfg, const bool *live)
                                ps = &s->next;
                        }
                }
-               // rule tags and copy tags can't be dead by construction
-               // (copy tags are only used for fallback tags)
        }
 }
 
index 8f87838060327635a318edde0ea24a4ae215b374..939d8eb40a1461abe2371e4dd336b0e0ac7f1c1b 100644 (file)
@@ -12,7 +12,7 @@ void cfg_t::interference(const cfg_t &cfg, const bool *live, bool *interf)
        const size_t nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1;
        bool *buf1 = new bool[nver];
        bool *buf2 = new bool[nver];
-       const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
+       const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfin;
 
        memset(interf, 0, nver * nver * sizeof(bool));
        for (; b < e; ++b, live += nver) {
index 825888fa33147d5fe963c1230688b4a15027e4f8..03ea6413fb2bf80760eb7ac664709d278e1887ea 100644 (file)
@@ -8,7 +8,6 @@ namespace re2c
 void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
 {
        const size_t
-               nbb = cfg.nbblock,
                nver = static_cast<size_t>(cfg.dfa.maxtagver) + 1,
                ntag = cfg.dfa.tags.size();
        bool *buf1 = new bool[nver];
@@ -27,14 +26,16 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
         * new versions.
         */
 
-       memset(live, 0, nbb * nver * sizeof(bool));
-       for (cfg_ix_t i = 0; i < nbb; ++i) {
-               const tagver_t *use = cfg.bblocks[i].use;
-               if (!use) continue;
-
+       memset(live, 0, cfg.nbbfin * nver * sizeof(bool));
+       for (cfg_ix_t i = cfg.nbbarc; i < cfg.nbbfin; ++i) {
+               const cfg_bb_t *b = cfg.bblocks + i;
                bool *l = &live[i * nver];
+
+               // all final bblocks have USE tags, but no successors
+               assert(b->use && b->succb == b->succe);
+
                for (size_t t = 0; t < ntag; ++t) {
-                       const tagver_t u = use[t];
+                       const tagver_t u = b->use[t];
                        if (u != TAGVER_ZERO) {
                                l[u] = true;
                        }
@@ -44,23 +45,32 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
        for (bool loop = true; loop;) {
                loop = false;
 
-               for (cfg_ix_t i = 0; i < nbb; ++i) {
+               for (cfg_ix_t i = 0; i < cfg.nbbarc; ++i) {
                        const cfg_bb_t *b = cfg.bblocks + i;
-                       if (b->use) continue;
-
                        bool *old = &live[i * nver];
+
+                       // transition bblocks have no USE tags
+                       assert(!b->use);
+
                        memcpy(buf1, old, nver * sizeof(bool));
                        for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
                                const bool *l = &live[*j * nver];
+                               const tcmd_t *cmd = cfg.bblocks[*j].cmd;
                                memcpy(buf2, l, nver * sizeof(bool));
 
-                               const tagsave_t *p = cfg.bblocks[*j].cmd->save;
-                               for (; p; p = p->next) {
+                               for (const tagsave_t *p = cmd->save; p; p = p->next) {
                                        buf2[p->ver] = false;
                                }
-
-                               // copy tags are only used for fallback tags,
-                               // their liveness is handled in a special way
+                               for (const tagcopy_t *p = cmd->copy; p; p = p->next) {
+                                       if (l[p->lhs]) {
+                                               buf2[p->lhs] = false;
+                                       }
+                               }
+                               for (const tagcopy_t *p = cmd->copy; p; p = p->next) {
+                                       if (l[p->lhs]) {
+                                               buf2[p->rhs] = true;
+                                       }
+                               }
 
                                for (size_t v = 0; v < nver; ++v) {
                                        buf1[v] |= buf2[v];
@@ -86,9 +96,11 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
         * but still we should prevent it from merging with other tags
         * (otherwise it may become overwritten).
         */
-       for (cfg_ix_t i = 0; i < nbb; ++i) {
+       for (cfg_ix_t i = cfg.nbbfin; i < cfg.nbbfall; ++i) {
                const cfg_bb_t *b = cfg.bblocks + i;
-               if (!b->use) continue;
+
+               // all fallback bblocks have USE tags
+               assert(b->use);
 
                memset(buf1, 0, nver * sizeof(bool));
                for (size_t t = 0; t < ntag; ++t) {
@@ -100,15 +112,10 @@ void cfg_t::liveness_analysis(const cfg_t &cfg, bool *live)
                for (const tagsave_t *p = b->cmd->save; p; p = p->next) {
                        buf1[p->ver] = false;
                }
-
-               // in rule tags copies are swapped: LHS is the origin, RHS is backup
                for (const tagcopy_t *p = b->cmd->copy; p; p = p->next) {
                        buf1[p->lhs] = false;
                        buf1[p->rhs] = true;
                }
-
-               // final bblock has no successors, instead it has the list
-               // of all bblocks reachable by non-accepting DFA paths
                for (cfg_ix_t *j = b->succb; j < b->succe; ++j) {
                        bool *liv = &live[*j * nver];
                        for (size_t v = 0; v < nver; ++v) {
index 91c54b50c97e5d6f05eacc1763dc085cb9d66b68..4fc6afa101ccb8cee3d41463f066d336c2d36173 100644 (file)
@@ -16,7 +16,7 @@ template<typename cmd_t> void normalize(cmd_t *cmd);
 
 void cfg_t::normalization(cfg_t &cfg)
 {
-       cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
+       cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
        for (; b < e; ++b) {
                normalize(b->cmd->save);
                normalize(b->cmd->copy);
index d48fa5eba957eaf6435f088b0d16269f048f0617..096b15b168f312d0b1f4582db9884edd2a2230b7 100644 (file)
@@ -12,7 +12,7 @@ void optimize_tags(dfa_t &dfa)
                cfg_t cfg(dfa);
 
                const size_t nver = static_cast<size_t>(dfa.maxtagver) + 1;
-               bool *live = new bool[cfg.nbblock * nver];
+               bool *live = new bool[cfg.nbbfin * nver];
                bool *interf = new bool[nver * nver];
                tagver_t *ver2new = new tagver_t[nver];
 
@@ -54,8 +54,9 @@ void freeze_tags(dfa_t &dfa)
                dfa_state_t *s = dfa.states[i];
                const tcmd_t
                        *cmd = s->tcmd,
-                       *const fin = cmd + nsym;
-               tcid_t *id = s->tcid = new tcid_t[nsym + 1];
+                       *const fin = cmd + nsym,
+                       *const fall = fin + 1;
+               tcid_t *id = s->tcid = new tcid_t[nsym + 2];
 
                // transition commands
                for(; cmd < fin; ++cmd) {
@@ -65,6 +66,9 @@ void freeze_tags(dfa_t &dfa)
                // final epsilon-transition command
                *id++ = pool.insert(fin->save, fin->copy);
 
+               // fallback epsilon-transition command
+               *id++ = pool.insert(fall->save, fall->copy);
+
                delete[] s->tcmd;
                s->tcmd = NULL;
        }
index be797504b2bbf0838e6ba9c7d20080829a227ff3..e2924537fcf214dc547724f2ab451cc4ea874628 100644 (file)
@@ -9,7 +9,7 @@ void cfg_t::renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver)
        if (oldmax == maxver) return;
        oldmax = maxver;
 
-       cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
+       cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
        for (; b < e; ++b) {
 
                // tag versions in save commands
index f710f4c64d33244ca2aeb3d84c451ee581a203f4..6e1735707dbada368bb460a9dad2a71e07febb25 100644 (file)
@@ -30,7 +30,7 @@ tagver_t cfg_t::variable_allocation(const cfg_t &cfg, const bool *interf,
        std::fill(repr, repr + nver, END);
 
        // copy coalescing: for each command X = Y, try to merge X and Y
-       const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock;
+       const cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbbfall;
        for (; b < e; ++b) {
                for (const tagcopy_t *p = b->cmd->copy; p; p = p->next) {
                        x = p->lhs;
index af47a8b8ee0cdd5c601457efb07f980c669c6ed7..fc044c1e67670ec8b7b0e46e925c6631d4931f16 100644 (file)
@@ -65,7 +65,7 @@ void closure_one(closure_t &clos, Tagpool &tagpool,
                case nfa_state_t::TAG: {
                        const size_t t = n->tag.info;
                        const tagver_t old = tags[t];
-                       tags[t] = static_cast<tagver_t>(t + 1);
+                       tags[t] = static_cast<tagver_t>(t + 1 + tagpool.ntags);
                        closure_one(clos, tagpool, n->tag.out, tags, badtags);
                        tags[t] = old;
                        break;
index 4e3f1e4aae486916e7cb87a67256684ad3f9d1fb..a145ca157b799f41664e304639bc8e4993450b2a 100644 (file)
@@ -83,7 +83,7 @@ dfa_t::dfa_t(const nfa_t &nfa,
                        f = std::find_if(clos0.begin(), e, clos_t::final);
                if (f != e) {
                        s->rule = f->state->rule;
-                       s->tcmd[nchars].save = tcpool.conv_to_save(tagpool[f->tagidx], ntag);
+                       s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[f->tagidx], rules[s->rule].tags, ntag);
                }
 
                // for each alphabet symbol, build tagged epsilon-closure
@@ -102,9 +102,10 @@ dfa_t::dfa_t(const nfa_t &nfa,
 
 tagver_t vartag_maxver(const std::valarray<Tag> &tags)
 {
-       for (size_t t = tags.size(); t > 0; --t) {
+       const size_t ntag = tags.size();
+       for (size_t t = ntag; t > 0; --t) {
                if (tags[t - 1].type == Tag::VAR) {
-                       return static_cast<tagver_t>(t);
+                       return static_cast<tagver_t>(ntag + t);
                }
        }
        return 0;
index 61d81f1dce3d85cd7ae15452372af3eed90d2364..bf0202351cbd19c5d10b18d705424890dcb73215 100644 (file)
@@ -29,7 +29,7 @@ struct dfa_state_t
 
        explicit dfa_state_t(size_t nchars)
                : arcs(new size_t[nchars])
-               , tcmd(new tcmd_t[nchars + 1]) // +1 for final epsilon-transition
+               , tcmd(new tcmd_t[nchars + 2]) // +2 for final and fallback epsilon-transitions
                , tcid(NULL)
                , rule(Rule::NONE)
                , fallthru(false)
index 5441e19e47c6de5a9d7db4e2bab9fc8bda656f90..9f109923653fcbf6187b56ff0b9aacf113a59738 100644 (file)
@@ -57,55 +57,50 @@ void find_overwritten_tags(const dfa_t &dfa, size_t state,
 // note [fallback states]
 void insert_fallback_tags(dfa_t &dfa)
 {
-       tagver_t maxver = dfa.maxtagver;
        tcpool_t &pool = dfa.tcpool;
        const size_t
                nstates = dfa.states.size(),
                nsym = dfa.nchars,
-               ntags = dfa.tags.size(),
-               nver = static_cast<size_t>(maxver) + 1;
+               nver = static_cast<size_t>(dfa.maxtagver) + 1;
        bool *been = new bool[nstates];
        bool *owrt = new bool[nver];
 
        for (size_t i = 0; i < nstates; ++i) {
                dfa_state_t *s = dfa.states[i];
-
                if (!s->fallback) continue;
+               tcmd_t *f = &s->tcmd[nsym], *b = f + 1;
+
+               // 'save' commands are the same as for final transition
+               for (tagsave_t *p = f->save; p; p = p->next) {
+                       b->save = pool.make_save(b->save, p->ver);
+               }
 
+               // 'copy' commands are split
                std::fill(been, been + nstates, false);
                std::fill(owrt, owrt + nver, false);
                find_overwritten_tags(dfa, i, been, owrt);
+               for (tagcopy_t *p = f->copy; p; p = p->next) {
 
-               const tagver_t *fin = dfa.rules[s->rule].tags;
-               for (const tagsave_t *p = s->tcmd[nsym].save; p; p = p->next) {
-                       owrt[p->ver] = false;
-               }
-
-               for (size_t t = 0; t < ntags; ++t) {
-                       const tagver_t
-                               f = fin[t],
-                               b = static_cast<tagver_t>(nver + t);
-
-                       if (f == TAGVER_ZERO || !owrt[f]) continue;
+                       // non-overwritten tags need 'copy' on fallback transition
+                       if (!owrt[p->rhs]) {
+                               b->copy = pool.make_copy(b->copy, p->lhs, p->rhs);
+                               continue;
+                       }
 
-                       // patch commands (backups must go first)
-                       tagcopy_t **p = &s->tcmd[nsym].copy;
-                       *p = pool.make_copy(*p, f, b);
+                       // overwritten tags need 'copy' on all outgoing non-accepting paths
+                       // ('copy' commands must go first, before potential overwrites)
                        for (size_t c = 0; c < nsym; ++c) {
                                size_t j = s->arcs[c];
                                if (j != dfa_t::NIL && dfa.states[j]->fallthru) {
-                                       p = &s->tcmd[c].copy;
-                                       *p = pool.make_copy(*p, b, f);
+                                       tagcopy_t *&q = s->tcmd[c].copy;
+                                       q = pool.make_copy(q, p->lhs, p->rhs);
                                }
                        }
-                       maxver = std::max(maxver, b);
                }
        }
 
        delete[] been;
        delete[] owrt;
-
-       dfa.maxtagver = maxver;
 }
 
 } // namespace re2c
index 263509c658fac636bc315620da166c13957d3130..e6a941dd260adf87ff7ed418d60b43c4c51a12d6 100644 (file)
@@ -43,6 +43,8 @@ bool tagcopy_t::equal(const tagcopy_t &x, const tagcopy_t &y)
 }
 
 tcmd_t::tcmd_t(): save(NULL), copy(NULL) {}
+tcmd_t::tcmd_t(tagsave_t *s, tagcopy_t *c): save(s), copy(c) {}
+bool tcmd_t::empty() const { return save == NULL && copy == NULL; }
 
 tccmd_t::tccmd_t(const tagsave_t *s, const tagcopy_t *c): save(s), copy(c) {}
 
@@ -83,6 +85,23 @@ tagsave_t *tcpool_t::conv_to_save(const tagver_t *vers, size_t ntag)
        return s;
 }
 
+tcmd_t tcpool_t::conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ntag)
+{
+       tagsave_t *s = NULL;
+       tagcopy_t *c = NULL;
+       for (size_t t = ntag; t-- > 0;) {
+               const tagver_t f = fins[t];
+               if (f == TAGVER_ZERO) continue;
+
+               if (vers[t] != TAGVER_ZERO) {
+                       s = make_save(s, f);
+               } else {
+                       c = make_copy(c, f, f + ntag);
+               }
+       }
+       return tcmd_t(s, c);
+}
+
 uint32_t hash_tcmd(const tagsave_t *save, const tagcopy_t *copy)
 {
        uint32_t h = 0;
index a20575ca5562db58d99e3eec8c3c88243a6853c7..0d50c2b7907ff79ad8e1dab428c127bb5c7a6af1 100644 (file)
@@ -39,6 +39,8 @@ struct tcmd_t
        tagcopy_t *copy;
 
        tcmd_t();
+       tcmd_t(tagsave_t *s, tagcopy_t *c);
+       bool empty() const;
 };
 
 struct tccmd_t
@@ -67,6 +69,7 @@ public:
        tagsave_t *make_save(tagsave_t *next, tagver_t ver);
        tagcopy_t *make_copy(tagcopy_t *next, tagver_t lhs, tagver_t rhs);
        tagsave_t *conv_to_save(const tagver_t *vers, size_t ntag);
+       tcmd_t conv_to_tcmd(const tagver_t *vers, const tagver_t *fins, size_t ntag);
 
        tcid_t insert(const tagsave_t *save, const tagcopy_t *copy);
        const tccmd_t &operator[](tcid_t id) const;
index 6ec1a8e7bbfacafbef3686ba9ebacb8cdd1e1ed3..b018ddf24e5990315804dabe23b502e231c21d4f 100644 (file)
@@ -36,7 +36,7 @@ yy5:
                yyt2 = YYCURSOR;
                goto yy6;
        case 'd':
-               yyt2 = YYCURSOR;
+               yyt1 = YYCURSOR;
                goto yy7;
        default:        goto yy4;
        }
@@ -59,12 +59,12 @@ yy7:
        switch (yych) {
        case 'a':       goto yy11;
        default:
-               yyt1 = YYCURSOR;
+               yyt2 = YYCURSOR;
                goto yy8;
        }
 yy8:
-       s = yyt1;
-       r = yyt2;
+       s = yyt2;
+       r = yyt1;
        { r s }
 yy9:
        ++YYCURSOR;
@@ -82,7 +82,7 @@ yy10:
                yyt1 = YYCURSOR;
                goto yy2;
        } else {
-               yyt1 = YYCURSOR;
+               yyt2 = YYCURSOR;
                goto yy8;
        }
 yy11:
@@ -91,7 +91,7 @@ yy11:
        yych = *YYCURSOR;
        switch (yych) {
        case 'd':
-               yyt2 = YYCURSOR;
+               yyt1 = YYCURSOR;
                goto yy7;
        default:        goto yy10;
        }
index f09c2fcc70903a0d66b13f06a86fe26852685718..3654bbd668727336adf3999b84486364549ec01c 100644 (file)
@@ -23,7 +23,7 @@ yy4:
        yych = *(YYMARKER = ++YYCURSOR);
        switch (yych) {
        case 'b':
-               yyt1 = YYCURSOR;
+               yyt2 = YYCURSOR;
                goto yy5;
        default:        goto yy3;
        }
@@ -40,7 +40,6 @@ yy6:
        if (yyaccept == 0) {
                goto yy3;
        } else {
-               yyt1 = yyt2;
                goto yy8;
        }
 yy7:
@@ -48,10 +47,9 @@ yy7:
        YYMARKER = ++YYCURSOR;
        if (YYLIMIT <= YYCURSOR) YYFILL(1);
        yych = *YYCURSOR;
+       yyt1 = yyt2;
        switch (yych) {
-       case 'a':
-               yyt2 = yyt1;
-               goto yy9;
+       case 'a':       goto yy9;
        default:        goto yy8;
        }
 yy8:
@@ -63,7 +61,7 @@ yy9:
        yych = *YYCURSOR;
        switch (yych) {
        case 'b':
-               yyt1 = YYCURSOR;
+               yyt2 = YYCURSOR;
                goto yy5;
        default:        goto yy6;
        }
index ad9b7209083b81db95166fa4235ad46b67f46e1f..8149e16ae789727d5ac5e8571d1da90cd605de35 100644 (file)
@@ -13,12 +13,12 @@ yyt2
        yych = *YYCURSOR;
        switch (yych) {
        case 'a':
-               yyt2 = YYCURSOR;
+               yyt1 = YYCURSOR;
                goto yy5;
        default:        goto yy3;
        }
 yy2:
-       YYCURSOR = yyt2;
+       YYCURSOR = yyt1;
        {}
 yy3:
        ++YYCURSOR;
@@ -28,7 +28,7 @@ yy5:
        yych = *++YYCURSOR;
        switch (yych) {
        case 'b':
-               yyt1 = YYCURSOR;
+               yyt2 = YYCURSOR;
                goto yy6;
        default:        goto yy4;
        }
@@ -42,10 +42,9 @@ yy7:
        YYMARKER = ++YYCURSOR;
        if (YYLIMIT <= YYCURSOR) YYFILL(1);
        yych = *YYCURSOR;
+       yyt1 = yyt2;
        switch (yych) {
-       case 'a':
-               yyt2 = yyt1;
-               goto yy9;
+       case 'a':       goto yy9;
        default:        goto yy8;
        }
 yy8:
@@ -57,13 +56,12 @@ yy9:
        yych = *YYCURSOR;
        switch (yych) {
        case 'b':
-               yyt1 = YYCURSOR;
+               yyt2 = YYCURSOR;
                goto yy11;
        default:        goto yy10;
        }
 yy10:
        YYCURSOR = YYMARKER;
-       yyt1 = yyt2;
        goto yy8;
 yy11:
        ++YYCURSOR;
index d77e65cc40cba1d0147b491a991dab7b284b1fcb..018b85f0addee11d24e0774ca9f2532783a6bb29 100644 (file)
@@ -14,13 +14,10 @@ yy1:
 yy0:
        if (YYLIMIT <= YYCURSOR) YYFILL(1);
        yych = *(YYMARKER = YYCURSOR);
+       yyt1 = yyt2;
        switch (yych) {
-       case 'a':
-               yyt2 = yyt1;
-               goto yy3;
-       case 'd':
-               yyt2 = yyt1;
-               goto yy5;
+       case 'a':       goto yy3;
+       case 'd':       goto yy5;
        default:        goto yy2;
        }
 yy2:
@@ -32,13 +29,12 @@ yy3:
        yych = *YYCURSOR;
        switch (yych) {
        case 'b':
-               yyt1 = YYCURSOR;
+               yyt2 = YYCURSOR;
                goto yy6;
        default:        goto yy4;
        }
 yy4:
        YYCURSOR = YYMARKER;
-       yyt1 = yyt2;
        goto yy2;
 yy5:
        ++YYCURSOR;
index 5e934d2559fa3cebba931c5ee8453eee81012a50..2c4dc6546261a50c1485d9902130fc7cfd89b807 100644 (file)
@@ -17,7 +17,7 @@ yyt3
        yych = *YYCURSOR;
        switch (yych) {
        case 'a':
-               yyt1 = YYCURSOR;
+               yyt3 = YYCURSOR;
                goto yy5;
        case 'c':       goto yy6;
        case 'e':       goto yy7;
@@ -54,14 +54,16 @@ yy9:
        yych = *YYCURSOR;
        switch (yych) {
        case 'a':
-               yyt3 = yyt1;
-               yyt1 = YYCURSOR;
+               yyt1 = yyt3;
+               yyt3 = YYCURSOR;
                goto yy10;
        case 'c':
-               yyt3 = yyt1;
+               yyt1 = yyt3;
                goto yy12;
        case 'e':       goto yy7;
-       default:        goto yy2;
+       default:
+               yyt1 = yyt3;
+               goto yy2;
        }
 yy10:
        ++YYCURSOR;
@@ -73,7 +75,6 @@ yy10:
        }
 yy11:
        YYCURSOR = YYMARKER;
-       yyt1 = yyt3;
        goto yy2;
 yy12:
        ++YYCURSOR;