]> granicus.if.org Git - re2c/commitdiff
Switched POSIX disambiguation algorithm from Kuklewicz to Okui. okui-tnfa
authorUlya Trofimovich <skvadrik@gmail.com>
Thu, 24 May 2018 23:05:37 +0000 (00:05 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Thu, 24 May 2018 23:05:37 +0000 (00:05 +0100)
16 files changed:
re2c/src/adfa/dump.cc
re2c/src/dfa/closure.cc
re2c/src/dfa/closure.h
re2c/src/dfa/determinization.cc
re2c/src/dfa/dump.cc
re2c/src/dfa/dump.h
re2c/src/dfa/find_state.cc
re2c/src/dfa/find_state.h
re2c/src/dfa/tagpool.cc
re2c/src/dfa/tagpool.h
re2c/src/dfa/tagtree.cc
re2c/src/dfa/tagtree.h
re2c/src/nfa/dump.cc
re2c/src/re/ast_to_re.cc
re2c/src/re/default_tags.cc
re2c/src/re/tag.h

index fd7c70e4873f7ec9644c60794f6b3e8a8eb05706..4aed4178a88536b9ffb0efc7ba9fc77905e83f95 100644 (file)
@@ -30,8 +30,8 @@ void dump_adfa(const DFA &dfa)
        fprintf(stderr,
                "digraph DFA {\n"
                "  rankdir=LR\n"
-               "  node[shape=Mrecord fontname=fixed]\n"
-               "  edge[arrowhead=vee fontname=fixed]\n\n");
+               "  node[shape=Mrecord fontname=Courier]\n"
+               "  edge[arrowhead=vee fontname=Courier]\n\n");
 
        fprintf(stderr,
                "  n [shape=point]"
index a097aafe9ef05c0f1068e30831a1bd842f40031e..5bdaedefd34059509fa5ea2d291b7e39c04db69e 100644 (file)
 namespace re2c
 {
 
-static void closure_posix(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags);
+static void closure_posix(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags, const bmatrix_t *bmatrix, size_t noldclos);
 static void closure_leftmost(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool);
-static int32_t compare_posix(const clos_t &c1, const clos_t &c2, Tagpool &tagpool, const std::vector<Tag> &tags);
 static void prune(closure_t &clos, std::valarray<Rule> &rules);
 static void lower_lookahead_to_transition(closure_t &clos);
 static tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers_t &newvers);
-static void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags);
+static void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags,
+       const bmatrix_t *bmatrix, bmatrix_t *&bmatrix1, size_t noldclos);
 static bool cmpby_rule_state(const clos_t &x, const clos_t &y);
 
 tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2,
-       Tagpool &tagpool, newvers_t &newvers, closure_t *shadow)
+       Tagpool &tagpool, newvers_t &newvers, closure_t *shadow,
+       const bmatrix_t *bmatrix, bmatrix_t *&bmatrix1, size_t noldclos)
 {
        // build tagged epsilon-closure of the given set of NFA states
        if (tagpool.opts->posix_captures) {
-               closure_posix(clos1, clos2, shadow, tagpool, dfa.tags);
+               closure_posix(clos1, clos2, shadow, tagpool, dfa.tags, bmatrix, noldclos);
                prune(clos2, dfa.rules);
-               orders(clos2, tagpool, dfa.tags);
                std::sort(clos2.begin(), clos2.end(), cmpby_rule_state);
+               orders(clos2, tagpool, dfa.tags, bmatrix, bmatrix1, noldclos);
        } else {
                closure_leftmost(clos1, clos2, shadow, tagpool);
                prune(clos2, dfa.rules);
@@ -67,18 +68,6 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y)
        return false;
 }
 
-// Skip non-orbit start tags: their position is fixed on some higher-priority
-// tag (except the very first tag, but in RE2C match is always anchored).
-// We cannot skip orbit start tag because the corresponding orbit end tag is
-// hoisted out of loop (by construction) and is, in fact, non-orbit; but we can
-// skip orbit end tag instead.
-// Skipping non-orbit start tags allows us to compare all subhistories in the
-// same way (incrementally). Subhistories of non-orbit start tags cannot be
-// compared incrementally, because default value may be added on a later step
-// than non-default value.
-static bool redundant(size_t t, const std::vector<Tag> &tags) {
-       return (t % 2 == 0) != orbit(tags[t]);
-}
 
 /* note [epsilon-closures in tagged NFA]
  *
@@ -98,7 +87,8 @@ static bool redundant(size_t t, const std::vector<Tag> &tags) {
  */
 
 static void enqueue(clos_t x, std::stack<nfa_state_t*> &bstack, closure_t &done,
-       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags)
+       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
+       const bmatrix_t *bmatrix, size_t noldclos)
 {
        nfa_state_t *n = x.state;
        uint32_t &i = n->clos;
@@ -107,10 +97,12 @@ static void enqueue(clos_t x, std::stack<nfa_state_t*> &bstack, closure_t &done,
                i = static_cast<uint32_t>(done.size());
                done.push_back(x);
        } else {
-               const int32_t cmp = compare_posix(x, done[i], tagpool, tags);
-               if (cmp < 0) std::swap(x, done[i]);
-               if (shadow && cmp != 0) shadow->push_back(x);
-               if (cmp >= 0) return;
+               clos_t &y = done[i];
+               int h1, h2, l;
+               l = tagpool.history.precedence (x, y, h1, h2, bmatrix, tags, noldclos);
+               if (l < 0) std::swap(x, y);
+               if (shadow && l != 0) shadow->push_back(x);
+               if (l >= 0) return;
        }
 
        if (n->status != GOR_TOPSORT) {
@@ -120,26 +112,27 @@ static void enqueue(clos_t x, std::stack<nfa_state_t*> &bstack, closure_t &done,
 }
 
 static void scan(nfa_state_t *n, std::stack<nfa_state_t*> &bstack, closure_t &done,
-       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags)
+       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
+       const bmatrix_t *bmatrix, size_t noldclos)
 {
        tagtree_t &history = tagpool.history;
        clos_t x = done[n->clos];
        switch (n->type) {
                case nfa_state_t::NIL:
                        x.state = n->nil.out;
-                       enqueue(x, bstack, done, shadow, tagpool, tags);
+                       enqueue(x, bstack, done, shadow, tagpool, tags, bmatrix, noldclos);
                        break;
                case nfa_state_t::ALT:
                        x.state = n->alt.out2;
-                       enqueue(x, bstack, done, shadow, tagpool, tags);
+                       enqueue(x, bstack, done, shadow, tagpool, tags, bmatrix, noldclos);
                        x.state = n->alt.out1;
-                       enqueue(x, bstack, done, shadow, tagpool, tags);
+                       enqueue(x, bstack, done, shadow, tagpool, tags, bmatrix, noldclos);
                        break;
                case nfa_state_t::TAG:
                        x.state = n->tag.out;
                        x.tlook = history.push(x.tlook, n->tag.info,
                                n->tag.bottom ? TAGVER_BOTTOM : TAGVER_CURSOR);
-                       enqueue(x, bstack, done, shadow, tagpool, tags);
+                       enqueue(x, bstack, done, shadow, tagpool, tags, bmatrix, noldclos);
                        break;
                case nfa_state_t::RAN:
                case nfa_state_t::FIN:
@@ -148,7 +141,8 @@ static void scan(nfa_state_t *n, std::stack<nfa_state_t*> &bstack, closure_t &do
 }
 
 void closure_posix(const closure_t &init, closure_t &done,
-       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags)
+       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
+       const bmatrix_t *bmatrix, size_t noldclos)
 {
        std::stack<nfa_state_t*>
                &astack = tagpool.astack,
@@ -158,7 +152,7 @@ void closure_posix(const closure_t &init, closure_t &done,
        done.clear();
        if (shadow) shadow->clear();
        for (cclositer_t c = init.begin(); c != init.end(); ++c) {
-               enqueue(*c, bstack, done, shadow, tagpool, tags);
+               enqueue(*c, bstack, done, shadow, tagpool, tags, bmatrix, noldclos);
        }
 
        // Gordberg-Radzik 'shortest path' algorithm.
@@ -176,7 +170,7 @@ void closure_posix(const closure_t &init, closure_t &done,
                        nfa_state_t *n = bstack.top();
                        if (n->status == GOR_NEWPASS) {
                                n->status = GOR_TOPSORT;
-                               scan(n, bstack, done, shadow, tagpool, tags);
+                               scan(n, bstack, done, shadow, tagpool, tags, bmatrix, noldclos);
                        } else if (n->status == GOR_TOPSORT) {
                                bstack.pop();
                                astack.push(n);
@@ -190,7 +184,7 @@ void closure_posix(const closure_t &init, closure_t &done,
                for (; !astack.empty(); ) {
                        nfa_state_t *n = astack.top();
                        astack.pop();
-                       scan(n, bstack, done, shadow, tagpool, tags);
+                       scan(n, bstack, done, shadow, tagpool, tags, bmatrix, noldclos);
                        n->status = GOR_OFFSTACK;
                }
        }
@@ -221,25 +215,6 @@ void closure_posix(const closure_t &init, closure_t &done,
  * with the highest priority (see note [closure items are sorted by rule]).
  */
 
-int32_t compare_posix(const clos_t &c1, const clos_t &c2,
-       Tagpool &tagpool, const std::vector<Tag> &tags)
-{
-       if (tagpool.ntags == 0
-               || (c1.order == c2.order && c1.tlook == c2.tlook)) return 0;
-
-       tagtree_t &h = tagpool.history;
-       for (size_t t = 0; t < tagpool.ntags; ++t) {
-               if (redundant(t, tags)) continue;
-               const hidx_t i1 = c1.tlook, i2 = c2.tlook;
-               const tagver_t
-                       o1 = tagpool[c1.order][t],
-                       o2 = tagpool[c2.order][t];
-               const int32_t cmp = h.compare_histories(i1, i2, o1, o2, t);
-               if (cmp != 0) return cmp;
-       }
-       return 0;
-}
-
 void closure_leftmost(const closure_t &init, closure_t &done,
        closure_t *shadow, Tagpool &tagpool)
 {
@@ -428,94 +403,27 @@ tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers
        return cmd;
 }
 
-/* note [POSIX orbit tags]
- *
- * POSIX disambiguation rules demand that earlier subexpressions match
- * the longest possible prefix of the input string (without violating the
- * whole match). To accommodate these rules, we resolve conflicts on orbit
- * tags by comparison of tag subhistories on conflicting NFA paths.
- *
- * If one subhistory is a proper prefix of another subhistory, it is less;
- * otherwise for the first pair of different offsets, if one offset is greater
- * than the other, then the corresponding subhistory is less.
- *
- * It is possible to pre-compare two NFA paths corresponding to the same
- * input string prefix and ending in the same NFA state; if paths are not
- * equal, the result of this comparison will hold for any common suffix.
- *
- * It is also possible to pre-compare NFA paths that correspond to the same
- * input prefix, but end in different NFA states. Such comparison is incorrect
- * unless subhistories start at the same offset; but if it is incorrect, we
- * will never use its result (tags with higher priority will also disagree).
- *
- * Therefore instead of keeping the whole history of offsets we calculate
- * the relative order of any pair of subhistories on each step.
- *
- * This part of the algorithm was invented by Christopher Kuklewicz.
- */
-
-struct cmp_posix_t
+static inline int32_t pack(int32_t longest, int32_t leftmost)
 {
-       Tagpool &tagpool;
-       size_t tag;
-       bool operator()(cclositer_t x, cclositer_t y)
-       {
-               const hidx_t i1 = x->tlook, i2 = y->tlook;
-               const tagver_t
-                       o1 = tagpool[x->order][tag],
-                       o2 = tagpool[y->order][tag];
-               // comparison result is inverted, because orders are used as offsets
-               return tagpool.history.compare_last_subhistories(i1, i2, o1, o2, tag) > 0;
-       }
-};
+       // leftmost: higher 2 bits, longest: lower 30 bits
+       return longest | (leftmost << 30);
+}
 
-void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags)
+void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags,
+       const bmatrix_t *bmatrix, bmatrix_t *&bmatrix1, size_t noldclos)
 {
-       clositer_t b = clos.begin(), e = clos.end(), c;
-       const size_t
-               ntag = tagpool.ntags,
-               nclos = clos.size();
-       size_t &maxclos = tagpool.maxclos;
-       tagver_t *&os = tagpool.orders, *o, *os0;
-       cclositer_t *&ps = tagpool.closes, *pe, *p;
-
-       if (ntag == 0) return;
-
-       // reallocate buffers if necessary
-       if (maxclos < nclos) {
-               maxclos = nclos * 2; // in advance
-               delete[] os;
-               delete[] ps;
-               os = new tagver_t[(ntag + 1) * maxclos];
-               ps = new cclositer_t[maxclos];
-       }
-
-       os0 = os + ntag * maxclos;
-       pe = ps;
-       for (c = b; c != e; ++c) *pe++ = c;
-
-       memset(os, 0, ntag * nclos * sizeof(tagver_t)); //some tags are skipped
-       for (size_t t = 0; t < ntag; ++t) {
-               if (redundant(t, tags)) continue;
-
-               cmp_posix_t cmp = {tagpool, t};
-               std::sort(ps, pe, cmp);
-               tagver_t m = 0;
-               o = os0;
-               for (p = ps; p < pe; ++m) {
-                       *o++ = m;
-                       for (; ++p < pe && !cmp(p[-1], p[0]);) *o++ = m;
-               }
-
-               o = os;
-               for (c = b; c != e; ++c, o += ntag) {
-                       o[t] = os0[std::find(ps, pe, c) - ps];
+       const size_t nclos = clos.size();
+       bmatrix1 = tagpool.alc.alloct<bmatrix_t>(nclos * nclos);
+
+       for (size_t i = 0; i < nclos; ++i) {
+               for (size_t j = i + 1; j < nclos; ++j) {
+                       int rho1, rho2, l;
+                       l = tagpool.history.precedence (clos[i], clos[j], rho1, rho2, bmatrix, tags, noldclos);
+                       bmatrix1[i * nclos + j] = pack(rho1, l);
+                       bmatrix1[j * nclos + i] = pack(rho2, -l);
                }
-       }
-
-       o = os;
-       for (c = b; c != e; ++c, o += ntag) {
-               c->order = tagpool.insert(o);
+               bmatrix1[i * nclos + i] = 0;
+               clos[i].index = i;
        }
 }
 
index ed30ca359ad0be1275d20547ad8a3a897e273e83..a5e1cc543189fc382045c3737b1950c5ea7887a1 100644 (file)
@@ -21,7 +21,7 @@ struct clos_t
 {
        nfa_state_t *origin; // for debug only
        nfa_state_t *state;
-       size_t order; // vector of orders
+       size_t index;
        size_t tvers; // vector of tag versions (including lookahead tags)
        hidx_t ttran; // history of transition tags
        hidx_t tlook; // history of lookahead tags
@@ -36,6 +36,8 @@ typedef closure_t::const_iterator cclositer_t;
 typedef closure_t::reverse_iterator rclositer_t;
 typedef closure_t::const_reverse_iterator rcclositer_t;
 
+typedef int bmatrix_t;
+
 struct newver_t
 {
        size_t tag;
@@ -54,14 +56,15 @@ struct newver_cmp_t
                if (x.base < y.base) return true;
                if (x.base > y.base) return false;
 
-               return history.compare_plain(x.history, y.history, x.tag) < 0;
+               return history.compare_reversed(x.history, y.history, x.tag) < 0;
        }
 };
 
 typedef std::map<newver_t, tagver_t, newver_cmp_t> newvers_t;
 
 tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2,
-       Tagpool &tagpool, newvers_t &newvers, closure_t *shadow);
+       Tagpool &tagpool, newvers_t &newvers, closure_t *shadow,
+       const bmatrix_t *bmatrix, bmatrix_t *&bmatrix1, size_t noldclos);
 
 } // namespace re2c
 
index f2f8347ae8cc77dbfdc730d6b10e143c60bff100..bca40743c7847807b180a05b4c8278984c85b076 100644 (file)
@@ -53,8 +53,7 @@ void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
                nfa_state_t *s1 = kernel->state[i],
                        *s2 = transition(s1, symbol);
                if (s2) {
-                       clos_t c = {s1, s2, kernel->order[i], kernel->tvers[i],
-                               kernel->tlook[i], HROOT};
+                       clos_t c = {s1, s2, i, kernel->tvers[i], kernel->tlook[i], HROOT};
                        clos.push_back(c);
                }
        }
@@ -82,6 +81,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
        newvers_t newvers(newvers_cmp);
        tcmd_t *acts;
        dump_dfa_t dump(*this, tagpool, nfa);
+       bmatrix_t *bmatrix = NULL;
 
        // all-zero tag configuration must have static number zero
        assert(ZERO_TAGS == tagpool.insert_const(TAGVER_ZERO));
@@ -111,15 +111,16 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
 
        clos_t c0 = {NULL, nfa.root, ZERO_TAGS, INITIAL_TAGS, HROOT, HROOT};
        clos1.push_back(c0);
-       acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow);
-       find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump);
+       acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow, NULL, bmatrix, 0);
+       find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump, bmatrix);
 
        for (size_t i = 0; i < kernels.size(); ++i) {
                newvers.clear();
                for (size_t c = 0; c < nchars; ++c) {
-                       reach(kernels[i], clos1, charset[c]);
-                       acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow);
-                       find_state(*this, i, c, kernels, clos2, acts, dump);
+                       const kernel_t *kernel = kernels[i];
+                       reach(kernel, clos1, charset[c]);
+                       acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow, kernel->bmatrix, bmatrix, kernel->size);
+                       find_state(*this, i, c, kernels, clos2, acts, dump, bmatrix);
                }
        }
 
index 7f8d505e6998c5fc3de1ce4a4c26c980034e28f8..6304d1f3b80680d1b53810c099535a961974018a 100644 (file)
@@ -34,8 +34,8 @@ dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n)
        shadow = new closure_t;
        fprintf(stderr, "digraph DFA {\n"
                "  rankdir=LR\n"
-               "  node[shape=plaintext fontname=fixed]\n"
-               "  edge[arrowhead=vee fontname=fixed]\n\n");
+               "  node[fontname=Courier shape=plaintext]\n"
+               "  edge[fontname=Courier arrowhead=vee]\n\n");
 }
 
 dump_dfa_t::~dump_dfa_t()
@@ -71,7 +71,7 @@ static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
        fprintf(stderr, " ");
 }
 
-void dump_dfa_t::closure_tags(cclositer_t c)
+void dump_dfa_t::closure_tags(cclositer_t c, bool shadowed)
 {
        if (!debug) return;
        if (c->tvers == ZERO_TAGS) return;
@@ -83,9 +83,14 @@ void dump_dfa_t::closure_tags(cclositer_t c)
 
        for (size_t t = 0; t < ntag; ++t) {
                fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
-//             if (tagpool.opts->posix_captures) {
-//                     fprintf(stderr, "[%d]", ords[t]);
-//             }
+               if (tagpool.opts->posix_captures && (t == 0)) {
+//             if (tagpool.opts->posix_captures && (t % 2 == 1 || orbit(dfa.tags[t]))) {
+                       if (shadowed) {
+//                             fprintf(stderr, "[ ]");
+                       } else {
+//                             fprintf(stderr, "[%d]", ords[t]);
+                       }
+               }
        }
 
        if (l != HROOT) {
@@ -93,7 +98,8 @@ void dump_dfa_t::closure_tags(cclositer_t c)
        }
 }
 
-void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
+void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew,
+       const bmatrix_t *bmatrix)
 {
        if (!debug) return;
 
@@ -103,7 +109,8 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
                *style = isnew ? "" : " STYLE=\"dotted\"",
                *color = " COLOR=\"lightgray\"";
 
-       fprintf(stderr, "  %s%u [label=<<TABLE"
+//     fprintf(stderr, "  %s%u [label=<<TABLE"
+       fprintf(stderr, "  %s%u [label=<<TABLE><TR><TD><TABLE"
                " BORDER=\"0\""
                " CELLBORDER=\"1\""
                ">", isnew ? "" : "i", state);
@@ -111,7 +118,7 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
        for (s = s1; s != s2; ++s) {
                fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"_%u_%d\"%s%s><FONT%s>%u",
                        index(s->state), (int)(s - s1), color, style, color, index(s->state));
-               closure_tags(s);
+               closure_tags(s, true);
                fprintf(stderr, "</FONT></TD></TR>");
        }
        if (!shadow->empty()) {
@@ -120,17 +127,25 @@ void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
        for (c = c1; c != c2; ++c) {
                fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"%u\"%s>%u",
                        index(c->state), style, index(c->state));
-               closure_tags(c);
+               closure_tags(c, false);
                fprintf(stderr, "</TD></TR>");
        }
+       fprintf(stderr, "</TABLE></TD></TR>\n");
+
+//     for (bmatrix_t::const_iterator i = bmatrix.begin(); i != bmatrix.end(); ++i) {
+//             fprintf(stderr, "<TR><TD>\n");
+//             fprintf(stderr, "(%u,%u) %d - %d\n", index(i->first.first), index(i->first.second), i->second, dmatrix.find(i->first)->second);
+//             fprintf(stderr, "</TD></TR>\n");
+//     }
+
        fprintf(stderr, "</TABLE>>]\n");
 }
 
-void dump_dfa_t::state0(const closure_t &clos)
+void dump_dfa_t::state0(const closure_t &clos, const bmatrix_t *bmatrix)
 {
        if (!debug) return;
 
-       closure(clos, 0, true);
+       closure(clos, 0, true, bmatrix);
        fprintf(stderr, "  void [shape=point]\n");
        for (cclositer_t c = shadow->begin(); c != shadow->end(); ++c) {
                fprintf(stderr, "  void -> 0:_%u_%d:w [style=dotted color=lightgray fontcolor=lightgray label=\"",
@@ -145,7 +160,8 @@ void dump_dfa_t::state0(const closure_t &clos)
        }
 }
 
-void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool isnew)
+void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool isnew,
+       const bmatrix_t *bmatrix)
 {
        if (!debug) return;
 
@@ -162,7 +178,7 @@ void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool
                z = isnew ? y : ++uniqidx;
        const char *prefix = isnew ? "" : "i";
 
-       closure(clos, z, isnew);
+       closure(clos, z, isnew, bmatrix);
        if (!isnew) {
                fprintf(stderr, "  i%u [style=dotted]\n"
                        "  i%u:s -> %u:s [style=dotted label=\"", z, z, y);
@@ -213,8 +229,8 @@ void dump_dfa(const dfa_t &dfa)
        fprintf(stderr,
                "digraph DFA {\n"
                "  rankdir=LR\n"
-               "  node[shape=Mrecord fontname=fixed]\n"
-               "  edge[arrowhead=vee fontname=fixed]\n\n");
+               "  node[shape=Mrecord fontname=Courier]\n"
+               "  edge[arrowhead=vee fontname=Courier]\n\n");
 
        // initializer
        fprintf(stderr,
@@ -317,7 +333,7 @@ void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers)
                        if (h.tag(t) != i) continue;
                        if (h.elem(t) < TAGVER_ZERO) {
                                fprintf(stderr, "&darr;");
-                       } else if (t > TAGVER_ZERO) {
+                       } else if (h.elem(t) > TAGVER_ZERO) {
                                fprintf(stderr, "&uarr;");
                        }
                }
index c58e281059367f4bc06e797fdc53b9acefc4143d..26b2532d6595035ad0373e30c1bc968af93bf4ba 100644 (file)
@@ -28,10 +28,10 @@ struct dump_dfa_t
 
        dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n);
        ~dump_dfa_t();
-       void closure_tags(cclositer_t c);
-       void closure(const closure_t &clos, uint32_t state, bool isnew);
-       void state0(const closure_t &clos);
-       void state(const closure_t &clos, size_t state, size_t symbol, bool isnew);
+       void closure_tags(cclositer_t c, bool shadowed);
+       void closure(const closure_t &clos, uint32_t state, bool isnew, const bmatrix_t *bmatrix);
+       void state0(const closure_t &clos, const bmatrix_t *bmatrix);
+       void state(const closure_t &clos, size_t state, size_t symbol, bool isnew, const bmatrix_t *bmatrix);
        void final(size_t state, const nfa_state_t *port);
        uint32_t index(const nfa_state_t *s) const;
        FORBID_COPY(dump_dfa_t);
index fe044bca4a12c07d716a2a6b60784cf0c3fbfc3e..0fc1adf559e6012fc75e8014864a9c587edaaeef 100644 (file)
 namespace re2c
 {
 
-kernel_t::kernel_t(size_t n)
-       : size(n)
-       , state(new nfa_state_t*[size])
-       , tvers(new size_t[size])
-       , tlook(new hidx_t[size])
-       , order(new size_t[size])
-{}
-
-kernel_t *kernel_t::copy(const kernel_t &k)
+kernel_t *kernel_t::make_init(size_t size, Tagpool &tagpool)
 {
-       const size_t n = k.size;
-       kernel_t *kcopy = new kernel_t(n);
-       memcpy(kcopy->state, k.state, n * sizeof(void*));
-       memcpy(kcopy->tvers, k.tvers, n * sizeof(size_t));
-       memcpy(kcopy->tlook, k.tlook, n * sizeof(hidx_t));
-       memcpy(kcopy->order, k.order, n * sizeof(size_t));
+       kernel_t *kcopy = tagpool.alc.alloct<kernel_t>(1);
+       kcopy->size = size;
+       kcopy->bmatrix = NULL;
+       kcopy->state = tagpool.alc.alloct<nfa_state_t*>(size);
+       kcopy->tvers = tagpool.alc.alloct<size_t>(size);
+       kcopy->tlook = tagpool.alc.alloct<hidx_t>(size);
        return kcopy;
 }
 
-kernel_t::~kernel_t()
+kernel_t *kernel_t::make_copy(const kernel_t &k, Tagpool &tagpool)
 {
-       delete[] state;
-       delete[] tvers;
-       delete[] tlook;
-       delete[] order;
+       kernel_t *kcopy = tagpool.alc.alloct<kernel_t>(1);
+
+       const size_t size = k.size;
+       kcopy->size = size;
+
+       bmatrix_t *bmatrix = NULL;
+       if (k.bmatrix) {
+               bmatrix = tagpool.alc.alloct<bmatrix_t>(size * size);
+               memcpy(bmatrix, k.bmatrix, size * size * sizeof(bmatrix_t));
+       }
+       kcopy->bmatrix = bmatrix;
+
+       kcopy->state = tagpool.alc.alloct<nfa_state_t*>(size);
+       memcpy(kcopy->state, k.state, size * sizeof(void*));
+
+       kcopy->tvers = tagpool.alc.alloct<size_t>(size);
+       memcpy(kcopy->tvers, k.tvers, size * sizeof(size_t));
+
+       kcopy->tlook = tagpool.alc.alloct<hidx_t>(size);
+       memcpy(kcopy->tlook, k.tlook, size * sizeof(hidx_t));
+
+       return kcopy;
 }
 
 static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y,
@@ -54,10 +64,10 @@ static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y,
                const hidx_t xl = x->tlook[i], yl = y->tlook[i];
                for (size_t t = 0; t < tagpool.ntags; ++t) {
                        if (history(tags[t])) {
-                               // compare subhistories
-                               if (h.compare_plain(xl, yl, t) != 0) return false;
+                               // compare full tag sequences
+                               if (h.compare_reversed(xl, yl, t) != 0) return false;
                        } else {
-                               // compare only the last tags
+                               // compare only the last pair of tags
                                if (h.last(xl, t) != h.last(yl, t)) return false;
                        }
                }
@@ -74,7 +84,7 @@ struct kernel_eq_t
                return x->size == y->size
                        && memcmp(x->state, y->state, x->size * sizeof(void*)) == 0
                        && memcmp(x->tvers, y->tvers, x->size * sizeof(size_t)) == 0
-                       && memcmp(x->order, y->order, x->size * sizeof(size_t)) == 0
+                       && (!x->bmatrix || memcmp(x->bmatrix, y->bmatrix, x->size * x->size * sizeof(bmatrix_t)) == 0)
                        && equal_lookahead_tags(x, y, tagpool, tags);
        }
 };
@@ -122,7 +132,7 @@ bool kernels_t::operator()(const kernel_t *k1, const kernel_t *k2)
        // check that kernel sizes, NFA states and orders coincide
        const bool compatible = k1->size == k2->size
                && memcmp(k1->state, k2->state, k1->size * sizeof(void*)) == 0
-               && memcmp(k1->order, k2->order, k1->size * sizeof(size_t)) == 0
+               && (! k1->bmatrix || memcmp(k1->bmatrix, k2->bmatrix, k1->size * k1->size * sizeof(bmatrix_t)) == 0)
                && equal_lookahead_tags(k1, k2, tagpool, tags);
        if (!compatible) return false;
 
@@ -215,7 +225,7 @@ kernels_t::kernels_t(Tagpool &tagp, tcpool_t &tcp, const std::vector<Tag> &ts)
        , tags(ts)
 
        , maxsize(0) // usually ranges from one to some twenty
-       , buffer(new kernel_t(maxsize))
+       , buffer(kernel_t::make_init(maxsize, tagp))
 
        , cap(0)
        , max(0)
@@ -230,23 +240,11 @@ kernels_t::kernels_t(Tagpool &tagp, tcpool_t &tcp, const std::vector<Tag> &ts)
        , actlhs(NULL)
 {}
 
-kernels_t::~kernels_t()
-{
-       delete buffer;
-       delete[] mem;
-
-       const size_t n = lookup.size();
-       for (size_t i = 0; i < n; ++i) {
-               delete lookup[i];
-       }
-}
-
 void kernels_t::init(tagver_t v, size_t nkern)
 {
        if (maxsize < nkern) {
                maxsize = nkern * 2; // in advance
-               delete buffer;
-               buffer = new kernel_t(maxsize);
+               buffer = kernel_t::make_init(maxsize, tagpool);
        }
 
        // +1 to ensure max tag version is not forgotten in loops
@@ -262,8 +260,7 @@ void kernels_t::init(tagver_t v, size_t nkern)
                        sz_actnext = n * sizeof(tcmd_t*),
                        sz_actlhs = n * sizeof(tagver_t),
                        sz_indeg = n * sizeof(uint32_t);
-               delete[] mem;
-               mem = new char[sz_x2y + sz_x2t + sz_actnext + sz_actlhs + sz_indeg];
+               mem = tagpool.alc.alloct<char>(sz_x2y + sz_x2t + sz_actnext + sz_actlhs + sz_indeg);
 
                // point to the center (zero index) of each buffer
                // indexes in range [-N .. N] must be valid, where N is capacity
@@ -307,7 +304,7 @@ const kernel_t *kernels_t::operator[](size_t idx) const
  */
 
 kernels_t::result_t kernels_t::insert(const closure_t &clos,
-       tcmd_t *acts, tagver_t maxver)
+       tcmd_t *acts, tagver_t maxver, const bmatrix_t *bmatrix)
 {
        const size_t nkern = clos.size();
        size_t x = dfa_t::NIL;
@@ -320,18 +317,20 @@ kernels_t::result_t kernels_t::insert(const closure_t &clos,
 
        // copy closure to buffer kernel and find its normal form
        buffer->size = nkern;
+       buffer->bmatrix = bmatrix;
        for (size_t i = 0; i < nkern; ++i) {
                const clos_t &c = clos[i];
                buffer->state[i] = c.state;
                buffer->tvers[i] = c.tvers;
                buffer->tlook[i] = c.tlook;
-               buffer->order[i] = c.order;
        }
 
        // get kernel hash
        uint32_t hash = static_cast<uint32_t>(nkern); // seed
        hash = hash32(hash, buffer->state, nkern * sizeof(void*));
-       hash = hash32(hash, buffer->order, nkern * sizeof(size_t));
+       if (bmatrix) {
+               hash = hash32(hash, buffer->bmatrix, nkern * nkern * sizeof(int));
+       }
 
        // try to find identical kernel
        kernel_eq_t eq = {tagpool, tags};
@@ -345,7 +344,7 @@ kernels_t::result_t kernels_t::insert(const closure_t &clos,
        if (x != index_t::NIL) return result_t(x, acts, false);
 
        // otherwise add new kernel
-       x = lookup.push(hash, kernel_t::copy(*buffer));
+       x = lookup.push(hash, kernel_t::make_copy(*buffer, tagpool));
        return result_t(x, acts, true);
 }
 
@@ -382,10 +381,10 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
 }
 
 void find_state(dfa_t &dfa, size_t state, size_t symbol, kernels_t &kernels,
-       const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump)
+       const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump, const bmatrix_t *bmatrix)
 {
        const kernels_t::result_t
-               result = kernels.insert(closure, acts, dfa.maxtagver);
+               result = kernels.insert(closure, acts, dfa.maxtagver, bmatrix);
 
        if (result.isnew) {
                // create new DFA state
@@ -406,12 +405,12 @@ void find_state(dfa_t &dfa, size_t state, size_t symbol, kernels_t &kernels,
 
        if (state == dfa_t::NIL) { // initial state
                dfa.tcmd0 = result.cmd;
-               dump.state0(closure);
+               dump.state0(closure, bmatrix);
        } else {
                dfa_state_t *s = dfa.states[state];
                s->arcs[symbol] = result.state;
                s->tcmd[symbol] = result.cmd;
-               dump.state(closure, state, symbol, result.isnew);
+               dump.state(closure, state, symbol, result.isnew, bmatrix);
        }
 }
 
index 0f0841256a668d7cb7a561f468458f4e8e9146a2..5e85d1dda1970fb5c85e1e81d61fbe42ec8af0b0 100644 (file)
@@ -25,14 +25,13 @@ struct tcmd_t;
 struct kernel_t
 {
        size_t size;
+       const bmatrix_t *bmatrix;
        nfa_state_t **state;
        size_t *tvers; // tag versions
        hidx_t *tlook; // lookahead tags
-       size_t *order; // see note [orbit order of closure items]
 
-       explicit kernel_t(size_t n);
-       ~kernel_t();
-       static kernel_t *copy(const kernel_t &k);
+       static kernel_t *make_init(size_t size, Tagpool &tagpool);
+       static kernel_t *make_copy(const kernel_t &k, Tagpool &tagpool);
        FORBID_COPY(kernel_t);
 };
 
@@ -78,17 +77,16 @@ private:
 
 public:
        kernels_t(Tagpool &tagp, tcpool_t &tcp, const std::vector<Tag> &ts);
-       ~kernels_t();
        void init(tagver_t v, size_t nkern);
        size_t size() const;
        const kernel_t* operator[](size_t idx) const;
-       result_t insert(const closure_t &clos, tcmd_t *acts, tagver_t maxver);
+       result_t insert(const closure_t &clos, tcmd_t *acts, tagver_t maxver, const bmatrix_t *bmatrix);
        bool operator()(const kernel_t *k1, const kernel_t *k2);
        FORBID_COPY(kernels_t);
 };
 
 void find_state(dfa_t &dfa, size_t state, size_t symbol, kernels_t &kernels,
-       const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump);
+       const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump, const bmatrix_t *bmatrix);
 
 } // namespace re2c
 
index b4fd90af0b798806240c0a9947367d72be62711f..0cc0feac6a88a4a0e7eea4d77ef99c67b11736dd 100644 (file)
@@ -25,9 +25,7 @@ Tagpool::Tagpool(const opt_t *o, size_t n)
        , opts(o)
        , ntags(n)
        , buffer(new tagver_t[n])
-       , maxclos(0)
-       , orders(NULL)
-       , closes(NULL)
+       , alc()
        , history()
        , astack()
        , bstack()
@@ -41,8 +39,6 @@ Tagpool::~Tagpool()
        for (size_t i = 0; i < n; ++i) {
                free(const_cast<tagver_t*>(lookup[i]));
        }
-       delete[] orders;
-       delete[] closes;
 }
 
 size_t Tagpool::insert_const(tagver_t ver)
index f06de0afd73abe6809d883c40c27052c1e53a6bb..38ac840bd8a836b87170d23db87724e9a3783086 100644 (file)
@@ -9,6 +9,7 @@
 #include "src/re/tag.h"
 #include "src/util/forbid_copy.h"
 #include "src/util/lookup.h"
+#include "src/util/slab_allocator.h"
 
 namespace re2c
 {
@@ -29,9 +30,8 @@ public:
        const size_t ntags;
        tagver_t *buffer;
 
-       size_t maxclos;
-       tagver_t *orders;
-       cclositer_t *closes;
+       typedef slab_allocator_t<> alc_t;
+       alc_t alc;
 
        tagtree_t history;
        std::stack<nfa_state_t*> astack;
index a5966a18201d70e4d97b061793decddff45e266f..09e2e93b96dcd5ac464bef40b5f17f2c46b85dd9 100644 (file)
@@ -1,6 +1,7 @@
 #include <assert.h>
 #include <stdlib.h>
 
+#include "src/dfa/closure.h"
 #include "src/dfa/tagtree.h"
 
 namespace re2c
@@ -23,137 +24,116 @@ hidx_t tagtree_t::push(hidx_t i, size_t t, tagver_t v)
        return static_cast<hidx_t>(nodes.size() - 1);
 }
 
-// cut out subhistory of this tag (just skip all other tags)
-static void subhistory(const tagtree_t &history,
-       std::vector<tagver_t> &path, hidx_t idx, size_t tag)
+tagver_t tagtree_t::last(hidx_t i, size_t t) const
 {
-       path.clear();
-       for (hidx_t i = idx; i != HROOT; i = history.pred(i)) {
-               if (history.tag(i) == tag) {
-                       path.push_back(history.elem(i));
-               }
+       for (; i != HROOT; i = pred(i)) {
+               if (tag(i) == t) return elem(i);
        }
+       return TAGVER_ZERO;
 }
 
-static int32_t compare_reversed(
-       const std::vector<tagver_t> &h1,
-       const std::vector<tagver_t> &h2)
+int32_t tagtree_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const
 {
-       std::vector<tagver_t>::const_reverse_iterator
-               i1 = h1.rbegin(), e1 = h1.rend(),
-               i2 = h2.rbegin(), e2 = h2.rend();
-
+       // compare in reverse, from tail to head: direction makes
+       // no difference when comparing for exact coincidence
        for (;;) {
-               if (i1 == e1 && i2 == e2) break;
-               if (i1 == e1) return -1;
-               if (i2 == e2) return 1;
-               if (*i1 > *i2) return -1;
-               if (*i1 < *i2) return 1;
-               ++i1; ++i2;
+               for (; x != HROOT && tag(x) != t; x = pred(x));
+               for (; y != HROOT && tag(y) != t; y = pred(y));
+               if (x == HROOT && y == HROOT) return 0;
+               if (x == HROOT) return -1;
+               if (y == HROOT) return 1;
+               if (elem(x) > elem(y)) return -1;
+               if (elem(x) < elem(y)) return 1;
+               x = pred(x);
+               y = pred(y);
        }
-
-       return 0;
 }
 
-int32_t tagtree_t::compare_plain(hidx_t x, hidx_t y, size_t t)
+static int height(ssize_t xtag, const std::vector<Tag> &tags)
 {
-       subhistory(*this, path1, x, t);
-       subhistory(*this, path2, y, t);
-       return compare_reversed(path1, path2);
+       const size_t tag = static_cast<size_t>(abs(xtag));
+       return tags[tag].height + (tag % 2 == 0 ? 1 : 0);
 }
 
-static size_t boundary_tag(size_t tag)
-{
-       // for start tags, return itself; for end tags, return start tag
-       // (start tags have even numbers, end tags have odd numbers)
-       return tag & ~1u;
-}
-
-// returns all subhistories of the given tag as one list (individual
-// subhistories are separated by delimiter)
-static int32_t subhistory_list(const tagtree_t &history,
-       std::vector<tagver_t> &path, hidx_t idx, size_t tag)
+static void reconstruct_history(const tagtree_t &history, std::vector<ssize_t> &path, hidx_t idx)
 {
        path.clear();
-       int32_t nsub = 0;
-       hidx_t i = idx;
-
-       const size_t bound = boundary_tag(tag);
-       path.push_back(DELIM);
-       for (;;) {
-               for (; i != HROOT && history.tag(i) >= bound; i = history.pred(i)) {
-                       if (history.tag(i) == tag) {
-                               path.push_back(history.elem(i));
-                       }
-               }
-               if (i == HROOT) break;
-               ++nsub;
-               path.push_back(DELIM);
-               for (; i != HROOT && history.tag(i) != tag; i = history.pred(i));
+       for (; idx != HROOT; idx = history.pred(idx)) {
+               const ssize_t
+                       sign = history.elem(idx) < 0 ? -1 : 1,
+                       xtag = static_cast<ssize_t>(history.tag(idx));
+               path.push_back(sign * xtag);
        }
-
-       return nsub;
 }
 
-// Lookahead may consist of multiple subhistories (each containing either
-// a single bottom value, or one or more cursor values (exactly one for
-// non-orbit subhistories). Because of the shortest-path algorithm earlier
-// subhistories do not necessarily coincide, so comparing only the last
-// pair of subhistories is not enough. See note [POSIX orbit tags].
-int32_t tagtree_t::compare_histories(hidx_t x, hidx_t y,
-       tagver_t ox, tagver_t oy, size_t t)
+static inline int32_t unpack_longest(int32_t value)
 {
-       const int32_t
-               n1 = subhistory_list(*this, path1, x, t),
-               n2 = subhistory_list(*this, path2, y, t);
-
-       assert(n1 == n2);
-       path1.push_back(ox);
-       path2.push_back(oy);
-
-       std::vector<tagver_t>::const_reverse_iterator
-               i1 = path1.rbegin(), e1 = path1.rend(),
-               i2 = path2.rbegin(), e2 = path2.rend();
-       for (;;) {
-               if (i1 == e1 && i2 == e2) return 0;
-               assert(i1 != e1 && i2 != e2);
-               const tagver_t v1 = *i1++, v2 = *i2++;
-               if (v1 == DELIM && v2 == DELIM) continue;
-               if (v1 == DELIM) return -1;
-               if (v2 == DELIM) return 1;
-               if (v1 > v2) return -1;
-               if (v1 < v2) return 1;
-       }
+       // lower 30 bits
+       return value & 0x3fffFFFF;
 }
 
-static void last_subhistory(const tagtree_t &history, std::vector<tagver_t> &path,
-       hidx_t idx, tagver_t order, size_t tag)
+static inline int32_t unpack_leftmost(int32_t value)
 {
-       path.clear();
-       hidx_t i = idx;
-       const size_t bound = boundary_tag(tag);
-       for (; i != HROOT && history.tag(i) >= bound; i = history.pred(i)) {
-               if (history.tag(i) == tag) {
-                       path.push_back(history.elem(i));
-               }
-       }
-       if (i == HROOT) path.push_back(order);
+       // higher 2 bits
+       return value >> 30u;
 }
 
-int32_t tagtree_t::compare_last_subhistories(hidx_t x, hidx_t y,
-       tagver_t ox, tagver_t oy, size_t t)
+int tagtree_t::precedence(const clos_t &x, const clos_t &y, int &rhox, int &rhoy,
+       const bmatrix_t *bmatrix, const std::vector<Tag> &tags, size_t nclos)
 {
-       last_subhistory(*this, path1, x, ox, t);
-       last_subhistory(*this, path2, y, oy, t);
-       return compare_reversed(path1, path2);
-}
+       reconstruct_history(*this, path1, x.tlook);
+       reconstruct_history(*this, path2, y.tlook);
+       std::vector<ssize_t>::const_reverse_iterator
+               i1 = path1.rbegin(), e1 = path1.rend(), j1 = i1, g1,
+               i2 = path2.rbegin(), e2 = path2.rend(), j2 = i2, g2;
+       const size_t xi = x.index, yi = y.index;
+       const bool fork_frame = xi == yi;
+
+       // find fork
+       if (fork_frame) {
+               for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2);
+       }
 
-tagver_t tagtree_t::last(hidx_t i, size_t t) const
-{
-       for (; i != HROOT; i = pred(i)) {
-               if (tag(i) == t) return elem(i);
+       // longest precedence
+       if (!fork_frame) {
+               rhox = unpack_longest(bmatrix[xi * nclos + yi]);
+               rhoy = unpack_longest(bmatrix[yi * nclos + xi]);
+       } else {
+               rhox = rhoy = std::numeric_limits<int>::max();
+               if (j1 > i1) rhox = rhoy = height(*(j1 - 1), tags);
        }
-       return TAGVER_ZERO;
+       for (g1 = j1; g1 != e1; ++g1) {
+               rhox = std::min(rhox, height(*g1, tags));
+       }
+       for (g2 = j2; g2 != e2; ++g2) {
+               rhoy = std::min(rhoy, height(*g2, tags));
+       }
+       if (rhox > rhoy) return -1;
+       if (rhox < rhoy) return 1;
+
+       // leftmost precedence
+       if (!fork_frame) {
+               return unpack_leftmost(bmatrix[xi * nclos + yi]);
+       } else {
+               // equal => not less
+               if (j1 == e1 && j2 == e2) return 0;
+               // shorter => less
+               if (j1 == e1) return -1;
+               if (j2 == e2) return 1;
+               // closing => less
+               const ssize_t v1 = *j1, v2 = *j2;
+               if (v1 % 2 == 1) return -1;
+               if (v2 % 2 == 1) return 1;
+               // nonnegative => less
+               // (unless both are positive, which is only possible because
+               // multiple top-level RE don't have proper negative tags)
+               const int invert = v1 > 0 && v2 > 0 ? -1 : 1;
+               if (v1 > v2) return invert * -1;
+               if (v1 < v2) return invert * 1;
+       }
+       // unreachable
+       assert(false);
+       return 0;
 }
 
 } // namespace re2c
index 7f3bf4bb14eb2e07edc3d1d9c7422d27adf86414..f72ccff6ab5070797cdeafdf5bd9044c9536f468 100644 (file)
@@ -16,6 +16,9 @@ typedef uint32_t hidx_t;
 
 static const hidx_t HROOT = ~0u;
 
+struct clos_t;
+typedef int bmatrix_t;
+
 struct tagtree_t
 {
        // the whole tree of tags found by the epsilon-closure
@@ -28,18 +31,19 @@ struct tagtree_t
        std::vector<node_t> nodes;
 
        // reconstruct paths for comparison
-       std::vector<tagver_t> path1;
-       std::vector<tagver_t> path2;
+       std::vector<ssize_t> path1;
+       std::vector<ssize_t> path2;
 
        tagtree_t();
        hidx_t pred(hidx_t i) const;
        tagver_t elem(hidx_t i) const;
        size_t tag(hidx_t i) const;
        hidx_t push(hidx_t i, size_t t, tagver_t v);
-       int32_t compare_plain(hidx_t x, hidx_t y, size_t t);
-       int32_t compare_histories(hidx_t x, hidx_t y, tagver_t ox, tagver_t oy, size_t t);
-       int32_t compare_last_subhistories(hidx_t x, hidx_t y, tagver_t ox, tagver_t oy, size_t t);
        tagver_t last(hidx_t i, size_t t) const;
+       int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const;
+       int32_t precedence(const clos_t &x, const clos_t &y, int &rhox, int &rhoy,
+               const bmatrix_t *bmatrix, const std::vector<Tag> &tags, size_t nclos);
+
        FORBID_COPY(tagtree_t);
 };
 
index 4d418b8bed30a4fe11fc80f4366e4dce5d2355e0..dd7fb9f3e043f868a5f84240d0508cff4c78b2d7 100644 (file)
@@ -20,8 +20,8 @@ void dump_nfa(const nfa_t &nfa)
        fprintf(stderr,
                "digraph NFA {\n"
                "  rankdir=LR\n"
-               "  node[shape=Mrecord fontname=fixed height=0.2 width=0.2]\n"
-               "  edge[arrowhead=vee fontname=fixed label=\" \"]\n\n");
+               "  node[shape=Mrecord fontname=Courier height=0.2 width=0.2]\n"
+               "  edge[arrowhead=vee fontname=Courier label=\" \"]\n\n");
 
        for (uint32_t i = static_cast<uint32_t>(nfa.size); i --> 0;) {
                const nfa_state_t *n = &nfa.states[i];
@@ -63,6 +63,7 @@ void dump_nfa(const nfa_t &nfa)
                                } else {
                                        fprintf(stderr, "&uarr;");
                                }
+                               fprintf(stderr, "(%d)", tag.height);
                                fprintf(stderr, "\"]\n");
                                break;
                        }
index dbcd0d07a3f0f905def3bb51f31bffeaf6f0e715..944061a4a39c1581f93d52942c60b45368b33a3a 100644 (file)
@@ -100,13 +100,15 @@ static size_t fixlen(const AST *ast)
        return Tag::VARDIST; /* unreachable */
 }
 
-static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
+static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap, int depth)
 {
        RE::alc_t &alc = spec.alc;
        std::vector<Tag> &tags = spec.tags;
        const opt_t *opts = spec.opts;
        Warn &warn = spec.warn;
 
+       if (ast->type != AST::CAP && ast->type != AST::REF) ++depth;
+
        switch (ast->type) {
                case AST::NIL:
                        return re_nil(alc);
@@ -153,22 +155,34 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
                        return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits()));
                case AST::ALT: {
                        RE *t1 = NULL, *t2 = NULL, *x, *y;
+                       RE *t3 = NULL, *t4 = NULL;
                        if (opts->posix_captures && has_tags(ast)
                                && ast->alt.ast1->type != AST::CAP) {
                                // see note [POSIX subexpression hierarchy]
                                t1 = re_tag(alc, tags.size(), false);
-                               tags.push_back(Tag(Tag::FICTIVE, false));
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
                                t2 = re_tag(alc, tags.size(), false);
-                               tags.push_back(Tag(Tag::FICTIVE, false));
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
                        }
-                       x = ast_to_re(spec, ast->alt.ast1, ncap);
+                       x = ast_to_re(spec, ast->alt.ast1, ncap, depth + 0);
                        x = re_cat(alc, t1, re_cat(alc, x, t2));
-                       y = ast_to_re(spec, ast->alt.ast2, ncap);
+
+                       if (opts->posix_captures && has_tags(ast)
+                               && ast->alt.ast2->type != AST::CAP) {
+                               // see note [POSIX subexpression hierarchy]
+                               t3 = re_tag(alc, tags.size(), false);
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
+                               t4 = re_tag(alc, tags.size(), false);
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
+                       }
+
+                       y = ast_to_re(spec, ast->alt.ast2, ncap, depth + 0);
+                       y = re_cat(alc, t3, re_cat(alc, y, t4));
                        return re_alt(alc, x, y);
                }
                case AST::DIFF: {
-                       RE *x = ast_to_re(spec, ast->diff.ast1, ncap);
-                       RE *y = ast_to_re(spec, ast->diff.ast2, ncap);
+                       RE *x = ast_to_re(spec, ast->diff.ast1, ncap, depth + 0);
+                       RE *y = ast_to_re(spec, ast->diff.ast2, ncap, depth + 0);
                        if (x->type != RE::SYM || y->type != RE::SYM) {
                                fatal_lc(ast->line, ast->column, "can only difference char sets");
                        }
@@ -176,18 +190,31 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
                }
                case AST::CAT: {
                        RE *t1 = NULL, *t2 = NULL, *x, *y;
+                       RE *t3 = NULL, *t4 = NULL;
                        const AST *a1 = ast->alt.ast1;
                        if (opts->posix_captures && has_tags(ast)
-                               && a1->type != AST::CAP && fixlen(a1) == Tag::VARDIST) {
+                               && a1->type != AST::CAP) {
                                // see note [POSIX subexpression hierarchy]
                                t1 = re_tag(alc, tags.size(), false);
-                               tags.push_back(Tag(Tag::FICTIVE, false));
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
                                t2 = re_tag(alc, tags.size(), false);
-                               tags.push_back(Tag(Tag::FICTIVE, false));
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
                        }
-                       x = ast_to_re(spec, ast->cat.ast1, ncap);
+                       x = ast_to_re(spec, ast->cat.ast1, ncap, depth + 0);
                        x = re_cat(alc, t1, re_cat(alc, x, t2));
-                       y = ast_to_re(spec, ast->cat.ast2, ncap);
+
+                       const AST *a2 = ast->alt.ast2;
+                       if (opts->posix_captures && has_tags(ast)
+                               && a2->type != AST::CAP) {
+                               // see note [POSIX subexpression hierarchy]
+                               t3 = re_tag(alc, tags.size(), false);
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
+                               t4 = re_tag(alc, tags.size(), false);
+                               tags.push_back(Tag(Tag::FICTIVE, false, depth));
+                       }
+
+                       y = ast_to_re(spec, ast->cat.ast2, ncap, depth + 0);
+                       y = re_cat(alc, t3, re_cat(alc, y, t4));
                        return re_cat(alc, x, y);
                }
                case AST::TAG: {
@@ -200,28 +227,28 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
                                        "simple tags are not allowed with '--posix-captures' option");
                        }
                        RE *t = re_tag(alc, tags.size(), false);
-                       tags.push_back(Tag(ast->tag.name, ast->tag.history));
+                       tags.push_back(Tag(ast->tag.name, ast->tag.history, depth));
                        return t;
                }
                case AST::CAP: {
                        if (!opts->posix_captures) {
-                               return ast_to_re(spec, ast->cap, ncap);
+                               return ast_to_re(spec, ast->cap, ncap, depth);
                        }
                        const AST *x = ast->cap;
                        if (x->type == AST::REF) x = x->ref.ast;
 
                        RE *t1 = re_tag(alc, tags.size(), false);
-                       tags.push_back(Tag(2 * ncap, false));
+                       tags.push_back(Tag(2 * ncap, false, depth));
 
                        RE *t2 = re_tag(alc, tags.size(), false);
-                       tags.push_back(Tag(2 * ncap + 1, false));
+                       tags.push_back(Tag(2 * ncap + 1, false, depth));
 
                        ++ncap;
-                       return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap), t2));
+                       return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap, depth), t2));
                }
                case AST::REF:
                        if (!opts->posix_captures) {
-                               return ast_to_re(spec, ast->ref.ast, ncap);
+                               return ast_to_re(spec, ast->ref.ast, ncap, depth);
                        }
                        fatal_l(ast->line,
                                "implicit grouping is forbidden with '--posix-captures'"
@@ -241,10 +268,10 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
                                if (x->type == AST::REF) x = x->ref.ast;
 
                                t1 = re_tag(alc, tags.size(), false);
-                               tags.push_back(Tag(2 * ncap, m > 1));
+                               tags.push_back(Tag(2 * ncap, m > 1, depth));
 
                                t2 = re_tag(alc, tags.size(), false);
-                               tags.push_back(Tag(2 * ncap + 1, false));
+                               tags.push_back(Tag(2 * ncap + 1, m > 1, depth));
 
                                ++ncap;
                        }
@@ -253,13 +280,13 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap)
                        if (m == 0) {
                                y = re_cat(alc, t1, t2);
                        } else if (m == 1) {
-                               y = ast_to_re(spec, x, ncap);
+                               y = ast_to_re(spec, x, ncap, depth + 0);
                                y = re_cat(alc, t1, re_cat(alc, y, t2));
                        } else  {
-                               y = ast_to_re(spec, x, ncap);
+                               y = ast_to_re(spec, x, ncap, depth + 0);
                                y = re_cat(alc, t1, y);
-                               y = re_iter(alc, y, n1, m);
                                y = re_cat(alc, y, t2);
+                               y = re_iter(alc, y, n1, m);
                        }
                        if (n == 0) {
                                y = re_alt(alc, y, re_nil(alc));
@@ -365,7 +392,7 @@ RESpec::RESpec(const std::vector<ASTRule> &ast, const opt_t *o, Warn &w)
 {
        for (size_t i = 0; i < ast.size(); ++i) {
                size_t ltag = tags.size(), ncap = 0;
-               res.push_back(ast_to_re(*this, ast[i].ast, ncap));
+               res.push_back(ast_to_re(*this, ast[i].ast, ncap, 0));
                init_rule(rules[i], ast[i].code, tags, ltag, ncap);
        }
 }
index 226e7058ffb0a2ec27bd7d42dc90a271043ea9d8..26ceb7e4149326c987ac26d0364ebd29ac3cbe6e 100644 (file)
@@ -17,7 +17,7 @@ static void insert_default_tags(RESpec &spec, RE *re, size_t *&tidx)
                case RE::SYM: break;
                case RE::ALT: {
                        size_t *i = tidx;
-                       RE *x = re_nil(alc), *y = re_nil(alc);
+                       RE *x = NULL, *y = NULL;
                        insert_default_tags(spec, re->alt.re1, tidx);
                        for (; i < tidx; ++i) {
                                x = re_cat(alc, x, re_tag(alc, *i, true));
@@ -27,7 +27,9 @@ static void insert_default_tags(RESpec &spec, RE *re, size_t *&tidx)
                                y = re_cat(alc, y, re_tag(alc, *i, true));
                        }
                        re->alt.re1 = re_cat(alc, re->alt.re1, y);
-                       re->alt.re2 = re_cat(alc, re->alt.re2, x);
+                       re->alt.re2 = spec.opts->posix_captures
+                               ? re_cat(alc, x, re->alt.re2)
+                               : re_cat(alc, re->alt.re2, x);
                        break;
                }
                case RE::CAT:
index 9a9a05e4aee71feb1d313f94418d555bd6c25553..7b3febdcb1e0f0fa393d0ad5575a31761322fdb6 100644 (file)
@@ -28,22 +28,25 @@ struct Tag
        size_t dist;
        bool history;
        bool orbit;
+       int height;
 
-       Tag(const std::string *n, bool h)
+       Tag(const std::string *n, bool h, int he)
                : name(n)
                , ncap(Tag::RIGHTMOST)
                , base(Tag::RIGHTMOST)
                , dist(Tag::VARDIST)
                , history(h)
                , orbit(false)
+               , height(he)
        {}
-       Tag(size_t c, bool o)
+       Tag(size_t c, bool o, int he)
                : name(NULL)
                , ncap(c)
                , base(Tag::RIGHTMOST)
                , dist(Tag::VARDIST)
                , history(false)
                , orbit(o)
+               , height(he)
        {}
 };