]> granicus.if.org Git - re2c/commitdiff
Gathered all determinization-related data in a struct to avoid passing many parameters.
authorUlya Trofimovich <skvadrik@gmail.com>
Sun, 12 Aug 2018 19:38:11 +0000 (20:38 +0100)
committerUlya Trofimovich <skvadrik@gmail.com>
Sun, 12 Aug 2018 20:07:12 +0000 (21:07 +0100)
12 files changed:
re2c/Makefile.am
re2c/src/dfa/closure.cc
re2c/src/dfa/closure.h
re2c/src/dfa/determinization.cc
re2c/src/dfa/determinization.h [new file with mode: 0644]
re2c/src/dfa/dfa.h
re2c/src/dfa/dump.cc
re2c/src/dfa/dump.h
re2c/src/dfa/find_state.cc
re2c/src/dfa/find_state.h
re2c/src/dfa/tagtree.cc
re2c/src/dfa/tagtree.h

index 378fb7673eecdf07ab0d8dd85e1efd37b34a01e6..f1479dadbf4b357878a9652c3ce108157079564d 100644 (file)
@@ -24,6 +24,7 @@ SRC_HDR = \
        src/adfa/dump.h \
        src/dfa/cfg/cfg.h \
        src/dfa/closure.h \
+       src/dfa/determinization.h \
        src/dfa/dfa.h \
        src/dfa/dump.h \
        src/dfa/find_state.h \
index 13db4da8e595aa19e83ebf9fed69c786336cb31e..5323bc84976a45d476558680af768d4c1f300e36 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "src/conf/opt.h"
 #include "src/dfa/closure.h"
+#include "src/dfa/determinization.h"
 #include "src/dfa/dfa.h"
 #include "src/dfa/tagpool.h"
 #include "src/dfa/tcmd.h"
@@ -76,42 +77,40 @@ namespace re2c
  */
 
 
-static void closure_posix(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags, const prectable_t *prectbl, size_t noldclos);
-static void closure_leftmost(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool);
-static void prune(closure_t &clos, std::valarray<Rule> &rules);
-static void lower_lookahead_to_transition(closure_t &clos);
-static tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers_t &newvers);
-static void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags,
-       const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos);
-static bool cmpby_rule_state(const clos_t &x, const clos_t &y);
+static void closure_posix(determ_context_t &);
+static nfa_state_t *relax(determ_context_t &, clos_t);
+static nfa_state_t *explore(determ_context_t &, nfa_state_t *);
+static void closure_leftmost(determ_context_t &);
+static void prune(closure_t &, std::valarray<Rule> &);
+static void lower_lookahead_to_transition(closure_t &);
+static void generate_versions(determ_context_t &);
+static void orders(determ_context_t &);
+static bool cmpby_rule_state(const clos_t &, const clos_t &);
+static int32_t pack(int32_t, int32_t);
 
 
-tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2,
-       Tagpool &tagpool, newvers_t &newvers, closure_t *shadow,
-       const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos)
+void tagged_epsilon_closure(determ_context_t &ctx)
 {
+       closure_t &closure = ctx.dc_closure;
+
        // build tagged epsilon-closure of the given set of NFA states
-       if (tagpool.opts->posix_captures) {
-               closure_posix(clos1, clos2, shadow, tagpool, dfa.tags, prectbl_old, noldclos);
-               prune(clos2, dfa.rules);
-               std::sort(clos2.begin(), clos2.end(), cmpby_rule_state);
-               orders(clos2, tagpool, dfa.tags, prectbl_old, prectbl_new, noldclos);
+       if (ctx.dc_opts->posix_captures) {
+               closure_posix(ctx);
+               prune(closure, ctx.dc_nfa.rules);
+               std::sort(closure.begin(), closure.end(), cmpby_rule_state);
+               orders(ctx);
        } else {
-               closure_leftmost(clos1, clos2, shadow, tagpool);
-               prune(clos2, dfa.rules);
+               closure_leftmost(ctx);
+               prune(closure, ctx.dc_nfa.rules);
        }
 
        // see note [the difference between TDFA(0) and TDFA(1)]
-       if (!tagpool.opts->lookahead) {
-               lower_lookahead_to_transition(clos2);
-               if (shadow) lower_lookahead_to_transition(*shadow);
+       if (!ctx.dc_opts->lookahead) {
+               lower_lookahead_to_transition(closure);
        }
 
        // merge tags from different rules, find nondeterministic tags
-       tcmd_t *cmd = generate_versions(dfa, clos2, tagpool, newvers);
-       if (shadow) generate_versions(dfa, *shadow, tagpool, newvers);
-
-       return cmd;
+       generate_versions(ctx);
 }
 
 
@@ -128,74 +127,73 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y)
 }
 
 
-static nfa_state_t *relax(clos_t x, closure_t &done,
-       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
-       const prectable_t *prectbl, size_t noldclos)
+nfa_state_t *relax(determ_context_t &ctx, clos_t x)
 {
+       closure_t &done = ctx.dc_closure;
        nfa_state_t *q = x.state;
-       uint32_t &i = q->clos;
+       const uint32_t idx = q->clos;
+       int32_t h1, h2;
 
        // first time we see this state
-       if (i == NOCLOS) {
-               i = static_cast<uint32_t>(done.size());
+       if (idx == NOCLOS) {
+               q->clos = static_cast<uint32_t>(done.size());
                done.push_back(x);
        }
+
        // States of in-degree less than 2 are not joint points;
        // the fact that we are re-scanning this state means that we found
        // a better path to some previous state. Due to the right distributivity
        // of path comparison over path concatenation (X < Y => XZ < YZ) we
        // can just propagate the new path up to the next join point.
        else if (q->indeg < 2) {
-               std::swap(x, done[i]);
-               if (shadow) shadow->push_back(x);
+               done[idx] = x;
        }
+
        // join point; compare the new path and the old path
+       else if (precedence(ctx, x, done[idx], h1, h2) < 0) {
+               done[idx] = x;
+       }
+
+       // the previous path was better, discard the new one
        else {
-               clos_t &y = done[i];
-               int h1, h2, l;
-               l = tagpool.history.precedence (x, y, h1, h2, prectbl, tags, noldclos);
-               if (l < 0) std::swap(x, y);
-               if (shadow && l != 0) shadow->push_back(x);
-               if (l >= 0) q = NULL;
+               q = NULL;
        }
 
        return q;
 }
 
 
-static nfa_state_t *explore(nfa_state_t *q, closure_t &done,
-       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
-       const prectable_t *prectbl, size_t noldclos)
+nfa_state_t *explore(determ_context_t &ctx, nfa_state_t *q)
 {
        // find the next admissible transition, adjust the index
        // of the next transition and return the to-state
        nfa_state_t *p = NULL;
-       clos_t x = done[q->clos];
+       clos_t x = ctx.dc_closure[q->clos];
        switch (q->type) {
                case nfa_state_t::NIL:
                        if (q->arcidx == 0) {
                                x.state = q->nil.out;
-                               p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos);
+                               p = relax(ctx, x);
                                ++q->arcidx;
                        }
                        break;
                case nfa_state_t::ALT:
                        if (q->arcidx == 0) {
                                x.state = q->alt.out1;
-                               p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos);
+                               p = relax(ctx, x);
                                ++q->arcidx;
                        }
                        if (q->arcidx == 1 && !p) {
                                x.state = q->alt.out2;
-                               p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos);
+                               p = relax(ctx, x);
                                ++q->arcidx;
                        }
                        break;
                case nfa_state_t::TAG:
                        if (q->arcidx == 0) {
                                x.state = q->tag.out;
-                               x.tlook = tagpool.history.push(x.tlook, q->tag.info);
-                               p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos);
+                               x.tlook = ctx.dc_tagtrie.push(x.tlook, q->tag.info);
+                               p = relax(ctx, x);
                                ++q->arcidx;
                        }
                        break;
@@ -207,21 +205,20 @@ static nfa_state_t *explore(nfa_state_t *q, closure_t &done,
 }
 
 
-void closure_posix(const closure_t &init, closure_t &done,
-       closure_t *shadow, Tagpool &tagpool, const std::vector<Tag> &tags,
-       const prectable_t *prectbl, size_t noldclos)
+void closure_posix(determ_context_t &ctx)
 {
+       const closure_t &init = ctx.dc_reached;
+       closure_t &done = ctx.dc_closure;
        std::stack<nfa_state_t*>
-               &topsort = tagpool.astack,
-               &linear = tagpool.bstack;
+               &topsort = ctx.dc_tagpool.astack,
+               &linear = ctx.dc_tagpool.bstack;
        nfa_state_t *q, *p;
 
        done.clear();
-       if (shadow) shadow->clear();
 
        // enqueue all initial states (there might be duplicates)
        for (cclositer_t c = init.begin(); c != init.end(); ++c) {
-               q = relax(*c, done, shadow, tagpool, tags, prectbl, noldclos);
+               q = relax(ctx, *c);
                if (q) {
                        topsort.push(q);
                        q->status = GOR_TOPSORT;
@@ -246,7 +243,7 @@ void closure_posix(const closure_t &init, closure_t &done,
                                q->status = GOR_TOPSORT;
 
                                // find next admissible transition
-                               while ((p = explore(q, done, shadow, tagpool, tags, prectbl, noldclos))
+                               while ((p = explore(ctx, q))
                                        && p->status != GOR_NOPASS) {
                                        p->active = 1;
                                }
@@ -274,7 +271,7 @@ void closure_posix(const closure_t &init, closure_t &done,
                        if (q->active) {
                                // scan admissible transitions
                                q->arcidx = 0;
-                               while ((p = explore(q, done, shadow, tagpool, tags, prectbl, noldclos))) {
+                               while ((p = explore(ctx, q))) {
                                        if (p->status == GOR_NOPASS) {
                                                topsort.push(p);
                                                p->arcidx = 0;
@@ -287,7 +284,6 @@ void closure_posix(const closure_t &init, closure_t &done,
 
                        q->status = GOR_NOPASS;
                        q->active = 0;
-                       q->arcidx = 0;
                }
        }
 
@@ -295,19 +291,20 @@ void closure_posix(const closure_t &init, closure_t &done,
        for (clositer_t i = done.begin(); i != done.end(); ++i) {
                q = i->state;
                q->clos = NOCLOS;
-               assert(q->status == GOR_NOPASS && q->active == 0 && q->arcidx == 0);
+               q->arcidx = 0;
+               assert(q->status == GOR_NOPASS && q->active == 0);
        }
 }
 
 
-void closure_leftmost(const closure_t &init, closure_t &done,
-       closure_t *shadow, Tagpool &tagpool)
+void closure_leftmost(determ_context_t &ctx)
 {
-       std::stack<clos_t> &todo = tagpool.cstack;
+       const closure_t &init = ctx.dc_reached;
+       closure_t &done = ctx.dc_closure;
+       std::stack<clos_t> &todo = ctx.dc_tagpool.cstack;
 
        // enqueue all initial states
        done.clear();
-       if (shadow) shadow->clear();
        for (rcclositer_t c = init.rbegin(); c != init.rend(); ++c) {
                todo.push(*c);
        }
@@ -321,30 +318,27 @@ void closure_leftmost(const closure_t &init, closure_t &done,
                if (n->clos == NOCLOS) {
                        n->clos = static_cast<uint32_t>(done.size());
                        done.push_back(x);
-               } else {
-                       if (shadow) shadow->push_back(x);
-                       continue;
-               }
 
-               switch (n->type) {
-                       case nfa_state_t::NIL:
-                               x.state = n->nil.out;
-                               todo.push(x);
-                               break;
-                       case nfa_state_t::ALT:
-                               x.state = n->alt.out2;
-                               todo.push(x);
-                               x.state = n->alt.out1;
-                               todo.push(x);
-                               break;
-                       case nfa_state_t::TAG:
-                               x.state = n->tag.out;
-                               x.tlook = tagpool.history.push(x.tlook, n->tag.info);
-                               todo.push(x);
-                               break;
-                       case nfa_state_t::RAN:
-                       case nfa_state_t::FIN:
-                               break;
+                       switch (n->type) {
+                               case nfa_state_t::NIL:
+                                       x.state = n->nil.out;
+                                       todo.push(x);
+                                       break;
+                               case nfa_state_t::ALT:
+                                       x.state = n->alt.out2;
+                                       todo.push(x);
+                                       x.state = n->alt.out1;
+                                       todo.push(x);
+                                       break;
+                               case nfa_state_t::TAG:
+                                       x.state = n->tag.out;
+                                       x.tlook = ctx.dc_tagtrie.push(x.tlook, n->tag.info);
+                                       todo.push(x);
+                                       break;
+                               case nfa_state_t::RAN:
+                               case nfa_state_t::FIN:
+                                       break;
+                       }
                }
        }
 
@@ -356,9 +350,9 @@ void closure_leftmost(const closure_t &init, closure_t &done,
 }
 
 
-void prune(closure_t &clos, std::valarray<Rule> &rules)
+void prune(closure_t &closure, std::valarray<Rule> &rules)
 {
-       clositer_t b = clos.begin(), e = clos.end(), i, j;
+       clositer_t b = closure.begin(), e = closure.end(), i, j;
 
        // drop "inner" states (non-final without outgoing non-epsilon transitions)
        j = std::stable_partition(b, e, clos_t::ran);
@@ -376,29 +370,35 @@ void prune(closure_t &clos, std::valarray<Rule> &rules)
                n = static_cast<size_t>(j - b) + 1;
        }
 
-       clos.resize(n);
+       closure.resize(n);
 }
 
 
-void lower_lookahead_to_transition(closure_t &clos)
+void lower_lookahead_to_transition(closure_t &closure)
 {
-       for (clositer_t c = clos.begin(); c != clos.end(); ++c) {
+       for (clositer_t c = closure.begin(); c != closure.end(); ++c) {
                c->ttran = c->tlook;
                c->tlook = HROOT;
        }
 }
 
 
-tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers_t &newvers)
+void generate_versions(determ_context_t &ctx)
 {
-       tcmd_t *cmd = NULL;
-       const size_t ntag = tagpool.ntags;
-       tagver_t *vers = tagpool.buffer, &maxver = dfa.maxtagver;
-       tagtree_t &tagtree = tagpool.history;
+       dfa_t &dfa = ctx.dc_dfa;
        const std::vector<Tag> &tags = dfa.tags;
+       const size_t ntag = tags.size();
+       tagver_t &maxver = dfa.maxtagver;
+       Tagpool &tagpool = ctx.dc_tagpool;
+       tagver_t *vers = tagpool.buffer;
+       closure_t &clos = ctx.dc_closure;
+       tagtree_t &tagtree = ctx.dc_tagtrie;
+       newvers_t &newvers = ctx.dc_newvers;
+
        clositer_t b = clos.begin(), e = clos.end(), c;
-       newver_cmp_t cmp = {tagtree};
+       newver_cmp_t cmp(tagtree);
        newvers_t newacts(cmp);
+       tcmd_t *cmd = NULL;
 
        // for each tag, if there is at least one tagged transition,
        // allocate new version (negative for bottom and positive for
@@ -468,32 +468,35 @@ tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers
                c->tvers = tagpool.insert(vers);
        }
 
-       return cmd;
+       ctx.dc_actions = cmd;
 }
 
 
-static inline int32_t pack(int32_t longest, int32_t leftmost)
+int32_t pack(int32_t longest, int32_t leftmost)
 {
        // leftmost: higher 2 bits, longest: lower 30 bits
        return longest | (leftmost << 30);
 }
 
 
-void orders(closure_t &clos, Tagpool &tagpool, const std::vector<Tag> &tags,
-       const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos)
+void orders(determ_context_t &ctx)
 {
-       const size_t nclos = clos.size();
-       prectbl_new = tagpool.alc.alloct<prectable_t>(nclos * nclos);
+       closure_t &closure = ctx.dc_closure;
+       const size_t nclos = closure.size();
+
+       prectable_t *prectbl = ctx.dc_allocator.alloct<prectable_t>(nclos * nclos);
 
        for (size_t i = 0; i < nclos; ++i) {
                for (size_t j = i + 1; j < nclos; ++j) {
                        int32_t rho1, rho2, l;
-                       l = tagpool.history.precedence (clos[i], clos[j], rho1, rho2, prectbl_old, tags, noldclos);
-                       prectbl_new[i * nclos + j] = pack(rho1, l);
-                       prectbl_new[j * nclos + i] = pack(rho2, -l);
+                       l = precedence (ctx, closure[i], closure[j], rho1, rho2);
+                       prectbl[i * nclos + j] = pack(rho1, l);
+                       prectbl[j * nclos + i] = pack(rho2, -l);
                }
-               prectbl_new[i * nclos + i] = 0;
+               prectbl[i * nclos + i] = 0;
        }
+
+       ctx.dc_prectbl = prectbl;
 }
 
 } // namespace re2c
index 1313a89f529c9566a9194d31197c8b0e2b9ca906..4e75e12c31299f1fb402c71ec56498cbdd7034b1 100644 (file)
@@ -23,10 +23,10 @@ typedef slab_allocator_t<> allocator_t;
 struct clos_t
 {
        nfa_state_t *state;
+       uint32_t origin;
        uint32_t tvers; // vector of tag versions (including lookahead tags)
        hidx_t ttran; // history of transition tags
        hidx_t tlook; // history of lookahead tags
-       uint32_t origin;
 
        static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; }
        static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; }
@@ -48,6 +48,9 @@ struct newver_t
 struct newver_cmp_t
 {
        tagtree_t &history;
+
+       explicit newver_cmp_t(tagtree_t &h) : history(h) {}
+
        bool operator()(const newver_t &x, const newver_t &y) const
        {
                if (x.tag < y.tag) return true;
@@ -62,10 +65,6 @@ struct newver_cmp_t
 
 typedef std::map<newver_t, tagver_t, newver_cmp_t> newvers_t;
 
-tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2,
-       Tagpool &tagpool, newvers_t &newvers, closure_t *shadow,
-       const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos);
-
 } // namespace re2c
 
 #endif // _RE2C_DFA_CLOSURE_
index 78211fa2e126dbf48add3a27b5bfd019312e5579..e7072b912fca59cfe9f2fb306a71d95a68ac20d5 100644 (file)
@@ -12,6 +12,7 @@
 #include "src/conf/warn.h"
 #include "src/dfa/closure.h"
 #include "src/dfa/dfa.h"
+#include "src/dfa/determinization.h"
 #include "src/dfa/dump.h"
 #include "src/dfa/find_state.h"
 #include "src/dfa/tagpool.h"
 namespace re2c
 {
 
-static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol);
-static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol);
-static void warn_nondeterministic_tags(const kernels_t &kernels,
-       const Tagpool &tagpool, const std::vector<Tag> &tags,
-       const std::valarray<Rule> &rules, const std::string &cond, Warn &warn);
+static nfa_state_t *transition(nfa_state_t *, uint32_t);
+static void reach_on_symbol(determ_context_t &);
+static void warn_nondeterministic_tags(const determ_context_t &);
 
 
-const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
+const uint32_t dfa_t::NIL = ~0u;
 
 
 nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
@@ -49,21 +48,59 @@ nfa_state_t *transition(nfa_state_t *state, uint32_t symbol)
 }
 
 
-void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
+void reach_on_symbol(determ_context_t &ctx)
 {
-       clos.clear();
+       const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin];
+       closure_t &reached = ctx.dc_reached;
+       const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol];
+
+       reached.clear();
        for (uint32_t i = 0; i < kernel->size; ++i) {
                nfa_state_t *s = transition(kernel->state[i], symbol);
                if (s) {
-                       clos_t c = {s, kernel->tvers[i], kernel->tlook[i], HROOT, i};
-                       clos.push_back(c);
+                       clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT};
+                       reached.push_back(c);
+               }
+       }
+}
+
+
+static uint32_t init_tag_versions(determ_context_t &ctx)
+{
+       dfa_t &dfa = ctx.dc_dfa;
+       const size_t ntags = dfa.tags.size();
+
+       // all-zero tag configuration must have static number zero
+       assert(ZERO_TAGS == ctx.dc_tagpool.insert_const(TAGVER_ZERO));
+
+       // initial tag versions: [1 .. N]
+       const uint32_t INITIAL_TAGS = ctx.dc_tagpool.insert_succ(1);
+
+       // other versions: [ .. -(N + 1)] and [N + 1 .. ]
+       dfa.maxtagver = static_cast<tagver_t>(ntags);
+
+       // final/fallback versions will be assigned on the go
+       dfa.finvers = new tagver_t[ntags];
+       for (size_t i = 0; i < ntags; ++i) {
+               dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver;
+       }
+
+       // mark tags with history (initial and final)
+       for (size_t i = 0; i < ntags; ++i) {
+               if (history(dfa.tags[i])) {
+                       tagver_t v = static_cast<tagver_t>(i) + 1, f = dfa.finvers[i];
+                       if (f != TAGVER_ZERO) {
+                               dfa.mtagvers.insert(f);
+                       }
+                       dfa.mtagvers.insert(v);
                }
        }
+
+       return INITIAL_TAGS;
 }
 
 
-dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
-       const std::string &cond, Warn &warn)
+dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn &warn)
        : states()
        , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges
        , charset(nfa.charset)
@@ -76,60 +113,34 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
        , tcmd0(NULL)
        , tcid0(TCID0)
 {
-       const size_t ntag = tags.size();
-       Tagpool tagpool(opts, ntag);
-       kernels_t kernels(tagpool, tcpool, tags);
-       closure_t clos1, clos2;
-       newver_cmp_t newvers_cmp = {tagpool.history};
-       newvers_t newvers(newvers_cmp);
-       tcmd_t *acts;
-       dump_dfa_t dump(*this, tagpool, nfa);
-       prectable_t *prectbl = NULL;
+       determ_context_t ctx(opts, warn, cond, nfa, *this);
 
-       // all-zero tag configuration must have static number zero
-       assert(ZERO_TAGS == tagpool.insert_const(TAGVER_ZERO));
-       // initial tag versions: [1 .. N]
-       const size_t INITIAL_TAGS = tagpool.insert_succ(1);
-       // other versions: [ .. -(N + 1)] and [N + 1 .. ]
-       maxtagver = static_cast<tagver_t>(ntag);
+       const uint32_t INITIAL_TAGS = init_tag_versions(ctx);
 
-       // final/fallback versions will be assigned on the go
-       finvers = new tagver_t[ntag];
-       for (size_t i = 0; i < ntag; ++i) {
-               finvers[i] = fixed(tags[i]) ? TAGVER_ZERO : ++maxtagver;
-       }
-
-       // mark tags with history (initial and final)
-       for (size_t i = 0; i < ntag; ++i) {
-               if (history(tags[i])) {
-                       tagver_t v = static_cast<tagver_t>(i) + 1, f = finvers[i];
-                       if (f != TAGVER_ZERO) mtagvers.insert(f);
-                       mtagvers.insert(v);
-               }
-       }
+       // initial state
+       const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT};
+       ctx.dc_reached.push_back(c0);
+       tagged_epsilon_closure(ctx);
+       find_state(ctx);
 
        // iterate while new kernels are added: for each alphabet symbol,
        // build tagged epsilon-closure of all reachable NFA states,
        // then find identical or mappable DFA state or add a new one
+       for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) {
+
+               ctx.dc_origin = i;
+               ctx.dc_newvers.clear();
 
-       clos_t c0 = {nfa.root, INITIAL_TAGS, HROOT, HROOT, 0};
-       clos1.push_back(c0);
-       acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow, NULL, prectbl, 0);
-       find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump, prectbl);
-
-       for (size_t i = 0; i < kernels.size(); ++i) {
-               newvers.clear();
-               for (size_t c = 0; c < nchars; ++c) {
-                       const kernel_t *kernel = kernels[i];
-                       reach(kernel, clos1, charset[c]);
-                       acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow, kernel->prectbl, prectbl, kernel->size);
-                       find_state(*this, i, c, kernels, clos2, acts, dump, prectbl);
+               for (uint32_t c = 0; c < nchars; ++c) {
+                       ctx.dc_symbol = c;
+
+                       reach_on_symbol(ctx);
+                       tagged_epsilon_closure(ctx);
+                       find_state(ctx);
                }
        }
 
-       if (!opts->posix_captures) {
-               warn_nondeterministic_tags(kernels, tagpool, tags, rules, cond, warn);
-       }
+       warn_nondeterministic_tags(ctx);
 }
 
 
@@ -137,17 +148,23 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts,
 // used in each kernel (degree of non-determinism) and warn about tags with
 // maximum degree two or more.
 // WARNING: this function assumes that kernel items are grouped by rule
-void warn_nondeterministic_tags(const kernels_t &kernels,
-       const Tagpool &tagpool, const std::vector<Tag> &tags,
-       const std::valarray<Rule> &rules, const std::string &cond, Warn &warn)
+void warn_nondeterministic_tags(const determ_context_t &ctx)
 {
+       if (ctx.dc_opts->posix_captures) return;
+
+       Warn &warn = ctx.dc_warn;
+       const kernels_t &kernels = ctx.dc_kernels;
+       const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+       const std::valarray<Rule> &rules = ctx.dc_dfa.rules;
+
        const size_t
-               ntag = tagpool.ntags,
-               nkrn = kernels.size();
+               ntag = tags.size(),
+               nkrn = kernels.size(),
+               nrule = rules.size();
        std::vector<size_t> maxv(ntag, 0);
        std::set<tagver_t> uniq;
 
-       for (size_t i = 0; i < nkrn; ++i) {
+       for (uint32_t i = 0; i < nkrn; ++i) {
                const kernel_t *k = kernels[i];
                nfa_state_t **s = k->state;
                const size_t n = k->size;
@@ -162,27 +179,50 @@ void warn_nondeterministic_tags(const kernels_t &kernels,
                        for (size_t t = rule.ltag; t < rule.htag; ++t) {
                                uniq.clear();
                                for (size_t m = l; m < u; ++m) {
-                                       uniq.insert(tagpool[v[m]][t]);
+                                       uniq.insert(ctx.dc_tagpool[v[m]][t]);
                                }
                                maxv[t] = std::max(maxv[t], uniq.size());
                        }
                }
        }
 
-       const size_t nrule = rules.size();
-       for (size_t r = 0; r < nrule; ++r) {
+       for (uint32_t r = 0; r < nrule; ++r) {
                const Rule &rule = rules[r];
                for (size_t t = rule.ltag; t < rule.htag; ++t) {
                        const size_t m = maxv[t];
                        if (m > 1) {
                                const uint32_t line = rule.code->fline;
-                               warn.nondeterministic_tags(line, cond, tags[t].name, m);
+                               warn.nondeterministic_tags(line, ctx.dc_condname, tags[t].name, m);
                        }
                }
        }
 }
 
 
+determ_context_t::determ_context_t(const opt_t *opts, Warn &warn
+       , const std::string &condname, const nfa_t &nfa, dfa_t &dfa)
+       : dc_opts(opts)
+       , dc_warn(warn)
+       , dc_condname(condname)
+       , dc_nfa(nfa)
+       , dc_dfa(dfa)
+       , dc_origin(dfa_t::NIL)
+       , dc_target(dfa_t::NIL)
+       , dc_symbol(0)
+       , dc_actions(NULL)
+       , dc_reached()
+       , dc_closure()
+       , dc_prectbl(NULL)
+       , dc_tagpool(opts, nfa.tags.size())
+       , dc_allocator(dc_tagpool.alc)
+       , dc_tagtrie(dc_tagpool.history)
+       , dc_kernels()
+       , dc_buffers(dc_allocator)
+       , dc_newvers(newver_cmp_t(dc_tagtrie))
+       , dc_dump(opts)
+{}
+
+
 dfa_t::~dfa_t()
 {
        std::vector<dfa_state_t*>::iterator
diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h
new file mode 100644 (file)
index 0000000..4ac2a50
--- /dev/null
@@ -0,0 +1,64 @@
+#ifndef _RE2C_DFA_DETERMINIZATION_
+#define _RE2C_DFA_DETERMINIZATION_
+
+#include "src/util/c99_stdint.h"
+#include <string>
+
+#include "src/dfa/closure.h"
+#include "src/dfa/dump.h"
+#include "src/dfa/tagpool.h"
+#include "src/dfa/tagtree.h"
+#include "src/dfa/find_state.h"
+#include "src/util/forbid_copy.h"
+
+namespace re2c
+{
+
+// fwd
+struct opt_t;
+struct Warn;
+struct nfa_t;
+struct dfa_t;
+
+struct determ_context_t
+{
+       // determinization input
+       const opt_t        *dc_opts;      // options
+       Warn               &dc_warn;      // warnings
+       const std::string  &dc_condname;  // the name of current condition (with -c)
+       const nfa_t        &dc_nfa;       // TNFA
+
+       // determinization output
+       dfa_t              &dc_dfa;       // resulting TDFA
+//     tcpool_t           &dc_tcmdpool;  // pool of tag actions
+//     uint32_t            dc_maxtagver; // maximal tag version
+//     tagver_t           *dc_finvers;   // tag versions used in final states
+//     std::set<tagver_t> &dc_mtagvers;  // the set of m-tags
+
+       // temporary structures used by determinization
+       uint32_t            dc_origin;    // from-state of the current transition
+       uint32_t            dc_target;    // to-state of the current transition
+       uint32_t            dc_symbol;    // alphabet symbol of the current transition
+       tcmd_t             *dc_actions;   // tag actions of the current transition
+       closure_t           dc_reached;
+       closure_t           dc_closure;
+       prectable_t        *dc_prectbl;   // precedence table for Okui POSIX disambiguation
+       Tagpool             dc_tagpool;
+       allocator_t        &dc_allocator;
+       tagtree_t          &dc_tagtrie;   // prefix trie of tag histories
+       kernels_t           dc_kernels;   // TDFA states under construction
+       kernel_buffers_t    dc_buffers;
+       newvers_t           dc_newvers;
+       dump_dfa_t          dc_dump;
+
+       determ_context_t(const opt_t *, Warn &, const std::string &, const nfa_t &, dfa_t &);
+       FORBID_COPY(determ_context_t);
+};
+
+void tagged_epsilon_closure(determ_context_t &ctx);
+void find_state(determ_context_t &ctx);
+int32_t precedence(determ_context_t &, const clos_t &, const clos_t &, int32_t &, int32_t &);
+
+} // namespace re2c
+
+#endif // _RE2C_DFA_DETERMINIZATION_
index 2135623f0ff8d32d1d5b07ec16c540f14249eadc..a6a17ba1e552937df9e93c959975f027a4ac1914 100644 (file)
@@ -47,7 +47,7 @@ struct dfa_state_t
 
 struct dfa_t
 {
-       static const size_t NIL;
+       static const uint32_t NIL;
 
        std::vector<dfa_state_t*> states;
        const size_t nchars;
index cf74a9277425c959d70acd1fdcb540903d1fe893..bf208d103d691aa1aaca6fb7db08620ac6628222 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "src/conf/opt.h"
 #include "src/dfa/dfa.h"
+#include "src/dfa/determinization.h"
 #include "src/dfa/dump.h"
 #include "src/dfa/find_state.h"
 #include "src/dfa/tagpool.h"
@@ -20,24 +21,19 @@ namespace re2c
 
 static void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sym, const tcpool_t &tcpool);
 static const char *tagname(const Tag &t);
-static void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers);
+static void dump_tags(const Tagpool &tagpool, hidx_t ttran, uint32_t tvers);
 
 
-dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n)
-       : debug(pool.opts->dump_dfa_raw)
-       , dfa(d)
-       , tagpool(pool)
+dump_dfa_t::dump_dfa_t(const opt_t *opts)
+       : debug(opts->dump_dfa_raw)
        , uniqidx(0)
-       , base(n.states)
-       , shadow(NULL)
 {
        if (!debug) return;
 
-       shadow = new closure_t;
        fprintf(stderr, "digraph DFA {\n"
                "  rankdir=LR\n"
-               "  node[shape=plaintext fontname=fixed]\n"
-               "  edge[arrowhead=vee fontname=fixed]\n\n");
+               "  node[shape=plaintext fontname=Courier]\n"
+               "  edge[arrowhead=vee fontname=Courier]\n\n");
 }
 
 
@@ -45,17 +41,10 @@ dump_dfa_t::~dump_dfa_t()
 {
        if (!debug) return;
 
-       delete shadow;
        fprintf(stderr, "}\n");
 }
 
 
-uint32_t dump_dfa_t::index(const nfa_state_t *s) const
-{
-       return static_cast<uint32_t>(s - base);
-}
-
-
 static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
 {
        if (i == HROOT) {
@@ -77,146 +66,107 @@ static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i)
 }
 
 
-void dump_dfa_t::closure_tags(cclositer_t c)
+void dump_dfa_t::state(const determ_context_t &ctx, bool isnew)
 {
        if (!debug) return;
-       if (c->tvers == ZERO_TAGS) return;
-
-       const hidx_t l = c->tlook;
-       const tagver_t *vers = tagpool[c->tvers];
-       const size_t ntag = tagpool.ntags;
-
-       for (size_t t = 0; t < ntag; ++t) {
-               fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
-       }
-
-       if (l != HROOT) {
-               dump_history(dfa, tagpool.history, l);
-       }
-}
 
+       const closure_t &closure = ctx.dc_closure;
+       cclositer_t b = closure.begin(), e = closure.end(), c;
+       const uint32_t origin = ctx.dc_origin;
+       const uint32_t target = ctx.dc_target;
+       const uint32_t symbol = ctx.dc_symbol;
+       const dfa_t &dfa = ctx.dc_dfa;
+       const Tagpool &tagpool = ctx.dc_tagpool;
+       uint32_t i;
 
-void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew)
-{
-       if (!debug) return;
+       if (target == dfa_t::NIL) return;
 
-       cclositer_t c1 = clos.begin(), c2 = clos.end(), c,
-               s1 = shadow->begin(), s2 = shadow->end(), s;
-       const char
-               *style = isnew ? "" : " STYLE=\"dotted\"",
-               *color = " COLOR=\"lightgray\"";
-       uint32_t i;
+       const uint32_t state = isnew ? target : ++uniqidx;
+       const char *prefix = isnew ? "" : "i";
+       const char *style = isnew ? "" : " STYLE=\"dotted\"";
 
+       // closure
        fprintf(stderr, "  %s%u [label=<<TABLE"
                " BORDER=\"0\""
                " CELLBORDER=\"1\""
-               ">", isnew ? "" : "i", state);
-
+               ">", prefix, state);
        i = 0;
-       for (s = s1; s != s2; ++s, ++i) {
-               fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"_%u_%u\"%s%s><FONT%s>%u",
-                       i, i, color, style, color, index(s->state));
-               closure_tags(s);
-               fprintf(stderr, "</FONT></TD></TR>");
-       }
-       if (!shadow->empty()) {
-               fprintf(stderr, "<TR><TD BORDER=\"0\"></TD></TR>");
-       }
-       i = 0;
-       for (c = c1; c != c2; ++c, ++i) {
+       for (c = b; c != e; ++c, ++i) {
                fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"%u\"%s>%u",
-                       i, style, index(c->state));
-               closure_tags(c);
+                       i, style, static_cast<uint32_t>(c->state - ctx.dc_nfa.states));
+
+               if (c->tvers != ZERO_TAGS) {
+                       const tagver_t *vers = tagpool[c->tvers];
+                       const size_t ntag = dfa.tags.size();
+
+                       for (size_t t = 0; t < ntag; ++t) {
+                               fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
+                       }
+
+                       if (c->tlook != HROOT) {
+                               dump_history(dfa, ctx.dc_tagtrie, c->tlook);
+                       }
+               }
+
                fprintf(stderr, "</TD></TR>");
        }
        fprintf(stderr, "</TABLE>>]\n");
-}
 
+       // transitions (initial state)
+       if (origin == dfa_t::NIL) {
+               fprintf(stderr, "  void [shape=point]\n");
 
-void dump_dfa_t::state0(const closure_t &clos)
-{
-       if (!debug) return;
-
-       uint32_t i;
-       closure(clos, 0, true);
-       fprintf(stderr, "  void [shape=point]\n");
-       i = 0;
-       for (cclositer_t c = shadow->begin(); c != shadow->end(); ++c, ++i) {
-               fprintf(stderr, "  void -> 0:_%u_%u:w [style=dotted color=lightgray fontcolor=lightgray label=\"",
-                       i, i);
-               dump_tags(tagpool, c->ttran, c->tvers);
-               fprintf(stderr, "\"]\n");
-       }
-       i = 0;
-       for (cclositer_t c = clos.begin(); c != clos.end(); ++c, ++i) {
-               fprintf(stderr, "  void -> 0:%u:w [style=dotted label=\"", i);
-               dump_tags(tagpool, c->ttran, c->tvers);
-               fprintf(stderr, "\"]\n");
+               uint32_t i = 0;
+               for (c = b; c != e; ++c, ++i) {
+                       fprintf(stderr, "  void -> 0:%u:w [style=dotted label=\"", i);
+                       dump_tags(tagpool, c->ttran, c->tvers);
+                       fprintf(stderr, "\"]\n");
+               }
        }
-}
 
+       // transitions (other states)
+       else {
+               if (!isnew) {
+                       fprintf(stderr,
+                               "  i%u [style=dotted]\n"
+                               "  i%u:s -> %u:s [style=dotted label=\"",
+                               state, state, origin);
+                       dump_tcmd(dfa.states[origin]->tcmd[symbol]);
+                       fprintf(stderr, "\"]\n");
+               }
 
-void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool isnew)
-{
-       if (!debug) return;
+               uint32_t i = 0;
+               for (c = b; c != e; ++c, ++i) {
+                       fprintf(stderr,
+                               "  %u:%u:e -> %s%u:%u:w [label=\"%u",
+                               origin, c->origin, prefix, state, i, symbol);
+                       dump_tags(tagpool, c->ttran, c->tvers);
+                       fprintf(stderr, "\"]\n");
+               }
+       }
 
-       const dfa_state_t *s = dfa.states[state];
-       const size_t state2 = s->arcs[symbol];
+       // if final state, dump finalizer
+       const dfa_state_t *t = dfa.states[target];
+       if (t->rule != Rule::NONE) {
+               const Rule &r = dfa.rules[t->rule];
+               const tcmd_t *cmd = t->tcmd[dfa.nchars];
 
-       if (state2 == dfa_t::NIL) return;
+               // see note [at most one final item per closure]
+               c = std::find_if(b, e, clos_t::fin);
+               assert(c != e);
 
-       const tcmd_t *cmd = s->tcmd[symbol];
-       const uint32_t
-               a = static_cast<uint32_t>(symbol),
-               x = static_cast<uint32_t>(state),
-               y = static_cast<uint32_t>(state2),
-               z = isnew ? y : ++uniqidx;
-       const char *prefix = isnew ? "" : "i";
-       uint32_t i;
+               fprintf(stderr, "  r%u [shape=none label=\"(", state);
+               for (size_t t = r.ltag; t < r.htag; ++t) {
+                       if (t > r.ltag) fprintf(stderr, " ");
+                       fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t]));
+               }
+               fprintf(stderr, ")\"]\n");
 
-       closure(clos, z, isnew);
-       if (!isnew) {
-               fprintf(stderr, "  i%u [style=dotted]\n"
-                       "  i%u:s -> %u:s [style=dotted label=\"", z, z, y);
+               fprintf(stderr, "  %u:%u:e -> r%u [style=dotted label=\"",
+                       state, c->origin, state);
                dump_tcmd(cmd);
                fprintf(stderr, "\"]\n");
        }
-       i = 0;
-       for (cclositer_t b = shadow->begin(), c = b; c != shadow->end(); ++c, ++i) {
-               fprintf(stderr, "  %u:%u:e -> %s%u:_%u_%u:w [color=lightgray fontcolor=lightgray label=\"%u",
-                       x, c->origin, prefix, z, i, i, a);
-               dump_tags(tagpool, c->ttran, c->tvers);
-               fprintf(stderr, "\"]\n");
-       }
-       i = 0;
-       for (cclositer_t c = clos.begin(); c != clos.end(); ++c, ++i) {
-               fprintf(stderr, "  %u:%u:e -> %s%u:%u:w [label=\"%u",
-                       x, c->origin, prefix, z, i, a);
-               dump_tags(tagpool, c->ttran, c->tvers);
-               fprintf(stderr, "\"]\n");
-       }
-}
-
-
-void dump_dfa_t::final(size_t state, const nfa_state_t *port)
-{
-       if (!debug) return;
-
-       const dfa_state_t *s = dfa.states[state];
-       const Rule &r = dfa.rules[s->rule];
-       const tcmd_t *cmd = s->tcmd[dfa.nchars];
-       const uint32_t x = static_cast<uint32_t>(state);
-
-       fprintf(stderr, "  r%u [shape=none label=\"(", x);
-       for (size_t t = r.ltag; t < r.htag; ++t) {
-               if (t > r.ltag) fprintf(stderr, " ");
-               fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t]));
-       }
-       fprintf(stderr, ")\"]\n");
-
-       fprintf(stderr, "  %u:%u:e -> r%u [style=dotted label=\"", x, index(port), x);
-       dump_tcmd(cmd);
-       fprintf(stderr, "\"]\n");
 }
 
 
@@ -229,8 +179,8 @@ void dump_dfa(const dfa_t &dfa)
        fprintf(stderr,
                "digraph DFA {\n"
                "  rankdir=LR\n"
-               "  node[shape=Mrecord fontname=fixed]\n"
-               "  edge[arrowhead=vee fontname=fixed]\n\n");
+               "  node[shape=Mrecord fontname=Courier]\n"
+               "  edge[arrowhead=vee fontname=Courier]\n\n");
 
        // initializer
        fprintf(stderr,
@@ -321,7 +271,7 @@ const char *tagname(const Tag &t)
 }
 
 
-void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers)
+void dump_tags(const Tagpool &tagpool, hidx_t ttran, uint32_t tvers)
 {
        if (ttran == HROOT) return;
 
index 6a7980a46eeb2afef8a34bc9736516a27a8dae80..52a41d9e92b73ec5b53d98298426713b3de4ebab 100644 (file)
@@ -4,38 +4,22 @@
 #include <stddef.h>
 #include "src/util/c99_stdint.h"
 
-#include "src/dfa/closure.h"
-#include "src/dfa/dfa.h"
-#include "src/util/forbid_copy.h"
-
 namespace re2c
 {
 
-struct Tagpool;
+struct determ_context_t;
 struct dfa_t;
-struct nfa_state_t;
-struct nfa_t;
+struct opt_t;
 struct tcmd_t;
-struct kernels_t;
 
 struct dump_dfa_t
 {
        const bool debug;
-       const dfa_t &dfa;
-       const Tagpool &tagpool;
        uint32_t uniqidx;
-       const nfa_state_t *base;
-       closure_t *shadow;
 
-       dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n);
+       explicit dump_dfa_t(const opt_t *);
        ~dump_dfa_t();
-       void closure_tags(cclositer_t c);
-       void closure(const closure_t &clos, uint32_t state, bool isnew);
-       void state0(const closure_t &clos);
-       void state(const closure_t &clos, size_t state, size_t symbol, bool isnew);
-       void final(size_t state, const nfa_state_t *port);
-       uint32_t index(const nfa_state_t *s) const;
-       FORBID_COPY(dump_dfa_t);
+       void state(const determ_context_t &, bool);
 };
 
 void dump_dfa(const dfa_t &dfa);
index b60062cd4615a812b49c94e47fe8ac752a79af54..7e54476d85228b30ec1dc33967ae7ba18dfd7bf1 100644 (file)
@@ -4,6 +4,7 @@
 #include <algorithm>
 #include <valarray>
 
+#include "src/dfa/determinization.h"
 #include "src/dfa/dfa.h"
 #include "src/dfa/dump.h"
 #include "src/dfa/find_state.h"
@@ -79,18 +80,26 @@ namespace re2c
 
 struct kernel_eq_t
 {
-       Tagpool &tagpool;
-       const std::vector<Tag> &tags;
-
+       const determ_context_t &ctx;
        bool operator()(const kernel_t *, const kernel_t *) const;
 };
 
 
-static void reserve_buffers(kernel_buffers_t &, allocator_t &, tagver_t, size_t);
+struct kernel_map_t
+{
+       determ_context_t &ctx;
+       bool operator()(const kernel_t *, const kernel_t *);
+};
+
+
 static kernel_t *make_new_kernel(size_t, allocator_t &);
 static kernel_t *make_kernel_copy(const kernel_t *, allocator_t &);
-static uint32_t hash_kernel(const kernel_t *kernel);
 static void copy_to_buffer_kernel(const closure_t &, const prectable_t *, kernel_t *);
+static void reserve_buffers(determ_context_t &);
+static uint32_t hash_kernel(const kernel_t *kernel);
+static bool equal_lookahead_tags(const kernel_t *, const kernel_t *, const determ_context_t &);
+static bool do_find_state(determ_context_t &ctx);
+static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin);
 
 
 kernel_buffers_t::kernel_buffers_t(allocator_t &alc)
@@ -107,16 +116,6 @@ kernel_buffers_t::kernel_buffers_t(allocator_t &alc)
 {}
 
 
-kernels_t::kernels_t(Tagpool &tagp, tcpool_t &tcp, const std::vector<Tag> &ts)
-       : lookup()
-       , tagpool(tagp)
-       , tcpool(tcp)
-       , tags(ts)
-       , buffers(tagp.alc)
-       , pacts(NULL)
-{}
-
-
 kernel_t *make_new_kernel(size_t size, allocator_t &alc)
 {
        kernel_t *k = alc.alloct<kernel_t>(1);
@@ -150,11 +149,15 @@ kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc)
 }
 
 
-void reserve_buffers(kernel_buffers_t &kbufs, allocator_t &alc,
-       tagver_t maxver, size_t maxkern)
+void reserve_buffers(determ_context_t &ctx)
 {
-       if (kbufs.maxsize < maxkern) {
-               kbufs.maxsize = maxkern * 2; // in advance
+       kernel_buffers_t &kbufs = ctx.dc_buffers;
+       allocator_t &alc = ctx.dc_allocator;
+       const tagver_t maxver = ctx.dc_dfa.maxtagver;
+       const size_t nkern = ctx.dc_closure.size();
+
+       if (kbufs.maxsize < nkern) {
+               kbufs.maxsize = nkern * 2; // in advance
                kbufs.kernel = make_new_kernel(kbufs.maxsize, alc);
        }
 
@@ -225,25 +228,31 @@ void copy_to_buffer_kernel(const closure_t &closure,
 }
 
 
-static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y,
-       Tagpool &tagpool, const std::vector<Tag> &tags)
+bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y, const determ_context_t &ctx)
 {
+       assert(x->size == y->size);
+
        if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) {
                return true;
        }
-       tagtree_t &h = tagpool.history;
+
+       tagtree_t &trie = ctx.dc_tagtrie;
+       const Tagpool &tagpool = ctx.dc_tagpool;
+       const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+
        for (size_t i = 0; i < x->size; ++i) {
                const hidx_t xl = x->tlook[i], yl = y->tlook[i];
                for (size_t t = 0; t < tagpool.ntags; ++t) {
                        if (history(tags[t])) {
                                // compare full tag sequences
-                               if (h.compare_reversed(xl, yl, t) != 0) return false;
+                               if (trie.compare_reversed(xl, yl, t) != 0) return false;
                        } else {
                                // compare only the last pair of tags
-                               if (h.last(xl, t) != h.last(yl, t)) return false;
+                               if (trie.last(xl, t) != trie.last(yl, t)) return false;
                        }
                }
        }
+
        return true;
 }
 
@@ -257,11 +266,11 @@ bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const
                && memcmp(x->state, y->state, n * sizeof(void*)) == 0
                && memcmp(x->tvers, y->tvers, n * sizeof(size_t)) == 0
                && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0)
-               && equal_lookahead_tags(x, y, tagpool, tags);
+               && equal_lookahead_tags(x, y, ctx);
 }
 
 
-bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
+bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y)
 {
        // check that kernel sizes, NFA states lookahead tags
        // and precedence table coincide (versions might differ)
@@ -269,26 +278,28 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
        const bool compatible = n == y->size
                && memcmp(x->state, y->state, n * sizeof(void*)) == 0
                && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0)
-               && equal_lookahead_tags(x, y, tagpool, tags);
+               && equal_lookahead_tags(x, y, ctx);
        if (!compatible) return false;
 
-       tagver_t *x2y = buffers.x2y, *y2x = buffers.y2x, max = buffers.max;
-       size_t *x2t = buffers.x2t;
+       const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+       const size_t ntag = tags.size();
+       kernel_buffers_t &bufs = ctx.dc_buffers;
+       tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max;
+       size_t *x2t = bufs.x2t;
 
        // map tag versions of one kernel to that of another
        // and check that lookahead versions (if any) coincide
-       const size_t ntag = tagpool.ntags;
        std::fill(x2y - max, x2y + max, TAGVER_ZERO);
        std::fill(y2x - max, y2x + max, TAGVER_ZERO);
        for (size_t i = 0; i < n; ++i) {
                const tagver_t
-                       *xvs = tagpool[x->tvers[i]],
-                       *yvs = tagpool[y->tvers[i]];
+                       *xvs = ctx.dc_tagpool[x->tvers[i]],
+                       *yvs = ctx.dc_tagpool[y->tvers[i]];
                const hidx_t xl = x->tlook[i];
 
                for (size_t t = 0; t < ntag; ++t) {
                        // see note [mapping ignores items with lookahead tags]
-                       if (tagpool.history.last(xl, t) != TAGVER_ZERO
+                       if (ctx.dc_tagtrie.last(xl, t) != TAGVER_ZERO
                                && !history(tags[t])) continue;
 
                        const tagver_t xv = xvs[t], yv = yvs[t];
@@ -305,7 +316,8 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
        }
 
        // we have bijective mapping; now try to create list of commands
-       tcmd_t *b1 = buffers.backup_actions, *b2 = b1, *a, **pa, *copy = NULL;
+       tcmd_t **pacts = &ctx.dc_actions, *a, **pa, *copy = NULL;
+       tcmd_t *b1 = bufs.backup_actions, *b2 = b1;
 
        // backup 'save' commands: if topsort finds cycles, this mapping
        // will be rejected and we'll have to revert all changes
@@ -328,7 +340,7 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
                const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv);
                if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) {
                        assert(axv != ayv);
-                       copy = tcpool.make_copy(copy, axv, ayv);
+                       copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv);
                }
        }
 
@@ -338,7 +350,7 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
        *pacts = copy;
 
        // see note [topological ordering of copy commands]
-       const bool nontrivial_cycles = tcmd_t::topsort(pacts, buffers.indegree);
+       const bool nontrivial_cycles = tcmd_t::topsort(pacts, bufs.indegree);
 
        // in case of cycles restore 'save' commands and fail
        if (nontrivial_cycles) {
@@ -351,59 +363,59 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y)
 }
 
 
-size_t kernels_t::insert(const closure_t &closure, tagver_t maxver,
-       const prectable_t *prectbl, tcmd_t *&acts, bool &is_new)
+bool do_find_state(determ_context_t &ctx)
 {
-       const size_t nkern = closure.size();
-       size_t x = dfa_t::NIL;
-       is_new = false;
+       kernels_t &kernels = ctx.dc_kernels;
+       const closure_t &closure = ctx.dc_closure;
 
        // empty closure corresponds to default state
-       if (nkern == 0) {
-               acts = NULL;
-               return x;
+       if (closure.size() == 0) {
+               ctx.dc_target = dfa_t::NIL;
+               ctx.dc_actions = NULL;
+               return false;
        }
 
        // resize buffer if closure is too large
-       reserve_buffers(buffers, tagpool.alc, maxver, nkern);
-       kernel_t *k = buffers.kernel;
+       reserve_buffers(ctx);
+       kernel_t *k = ctx.dc_buffers.kernel;
 
        // copy closure to buffer kernel
-       copy_to_buffer_kernel(closure, prectbl, k);
+       copy_to_buffer_kernel(closure, ctx.dc_prectbl, k);
 
        // hash "static" part of the kernel
        const uint32_t hash = hash_kernel(k);
 
        // try to find identical kernel
-       kernel_eq_t cmp_eq = {tagpool, tags};
-       x = lookup.find_with(hash, k, cmp_eq);
-       if (x != index_t::NIL) return x;
+       kernel_eq_t cmp_eq = {ctx};
+       ctx.dc_target = kernels.find_with(hash, k, cmp_eq);
+       if (ctx.dc_target != kernels_t::NIL) return false;
 
        // else try to find mappable kernel
        // see note [bijective mappings]
-       this->pacts = &acts;
-       x = lookup.find_with(hash, k, *this);
-       if (x != index_t::NIL) return x;
+       kernel_map_t cmp_map = {ctx};
+       ctx.dc_target = kernels.find_with(hash, k, cmp_map);
+       if (ctx.dc_target != kernels_t::NIL) return false;
 
        // otherwise add new kernel
-       x = lookup.push(hash, make_kernel_copy(k, tagpool.alc));
-       is_new = true;
-       return x;
+       kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator);
+       ctx.dc_target = kernels.push(hash, kcopy);
+       return true;
 }
 
 
-static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
-       dfa_t &dfa, const Tagpool &tagpool, const std::vector<Tag> &tags)
+tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin)
 {
+       dfa_t &dfa = ctx.dc_dfa;
+       const Rule &rule = dfa.rules[fin.state->rule];
+       const tagver_t *vers = ctx.dc_tagpool[fin.tvers];
+       const hidx_t look = fin.tlook;
+       const tagtree_t &hist = ctx.dc_tagtrie;
        tcpool_t &tcpool = dfa.tcpool;
-       const Rule &rule = dfa.rules[ridx];
-       const tagver_t *vers = tagpool[clos.tvers];
-       const tagtree_t &hist = tagpool.history;
-       const hidx_t look = clos.tlook;
        tcmd_t *copy = NULL, *save = NULL, **p;
 
        for (size_t t = rule.ltag; t < rule.htag; ++t) {
-               const Tag &tag = tags[t];
+
+               const Tag &tag = dfa.tags[t];
                if (fixed(tag)) continue;
 
                const tagver_t v = abs(vers[t]), l = hist.last(look, t);
@@ -425,11 +437,12 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx,
 }
 
 
-void find_state(dfa_t &dfa, size_t origin, size_t symbol, kernels_t &kernels,
-       const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump, const prectable_t *prectbl)
+void find_state(determ_context_t &ctx)
 {
-       bool is_new;
-       const size_t state = kernels.insert(closure, dfa.maxtagver, prectbl, acts, is_new);
+       dfa_t &dfa = ctx.dc_dfa;
+
+       // find or add the new state in the existing set of states
+       const bool is_new = do_find_state(ctx);
 
        if (is_new) {
                // create new DFA state
@@ -438,25 +451,27 @@ void find_state(dfa_t &dfa, size_t origin, size_t symbol, kernels_t &kernels,
 
                // check if the new state is final
                // see note [at most one final item per closure]
-               cclositer_t c1 = closure.begin(), c2 = closure.end(),
-                       c = std::find_if(c1, c2, clos_t::fin);
-               if (c != c2) {
-                       t->rule = c->state->rule;
-                       t->tcmd[dfa.nchars] = finalizer(*c, t->rule, dfa,
-                               kernels.tagpool, kernels.tags);
-                       dump.final(state, c->state);
+               cclositer_t
+                       b = ctx.dc_closure.begin(),
+                       e = ctx.dc_closure.end(),
+                       f = std::find_if(b, e, clos_t::fin);
+               if (f != e) {
+                       t->tcmd[dfa.nchars] = final_actions(ctx, *f);
+                       t->rule = f->state->rule;
                }
        }
 
-       if (origin == dfa_t::NIL) { // initial state
-               dfa.tcmd0 = acts;
-               dump.state0(closure);
-       } else {
-               dfa_state_t *s = dfa.states[origin];
-               s->arcs[symbol] = state;
-               s->tcmd[symbol] = acts;
-               dump.state(closure, origin, symbol, is_new);
+       if (ctx.dc_origin == dfa_t::NIL) {
+               // initial state
+               dfa.tcmd0 = ctx.dc_actions;
        }
+       else {
+               dfa_state_t *s = dfa.states[ctx.dc_origin];
+               s->arcs[ctx.dc_symbol] = ctx.dc_target;
+               s->tcmd[ctx.dc_symbol] = ctx.dc_actions;
+       }
+
+       ctx.dc_dump.state(ctx, is_new);
 }
 
 } // namespace re2c
index 913088f18655c38ad80a6b71b0c196b2db175656..5f65d19ba6b924a6e1635f01be77c602cd8b4295 100644 (file)
@@ -49,32 +49,7 @@ struct kernel_buffers_t
        explicit kernel_buffers_t(allocator_t &alc);
 };
 
-struct kernels_t
-{
-       typedef lookup_t<const kernel_t*> index_t;
-       index_t lookup;
-
-public:
-       Tagpool &tagpool;
-       tcpool_t &tcpool;
-       const std::vector<Tag> &tags;
-
-private:
-       kernel_buffers_t buffers;
-
-       tcmd_t **pacts;
-
-public:
-       kernels_t(Tagpool &tagp, tcpool_t &tcp, const std::vector<Tag> &ts);
-       inline size_t size() const { return lookup.size(); }
-       inline const kernel_t* operator[](size_t idx) const { return lookup[idx]; }
-       size_t insert(const closure_t &clos, tagver_t maxver, const prectable_t *prectbl, tcmd_t *&acts, bool &is_new);
-       bool operator()(const kernel_t *k1, const kernel_t *k2);
-       FORBID_COPY(kernels_t);
-};
-
-void find_state(dfa_t &dfa, size_t state, size_t symbol, kernels_t &kernels,
-       const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump, const prectable_t *prectbl);
+typedef lookup_t<const kernel_t*> kernels_t;
 
 } // namespace re2c
 
index bf056f98165b36206159e56c8bf43d3370d58234..07ec71e8c07d995a3c5ac904e1279da173bbdfc4 100644 (file)
@@ -2,6 +2,7 @@
 #include <stdlib.h>
 
 #include "src/dfa/closure.h"
+#include "src/dfa/determinization.h"
 #include "src/dfa/tagtree.h"
 
 namespace re2c
@@ -61,7 +62,7 @@ int32_t tagtree_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const
 
 
 static void reconstruct_history(const tagtree_t &history,
-       std::vector<tag_info_t> &path, hidx_t idx)
+       tag_path_t &path, hidx_t idx)
 {
        path.clear();
        for (; idx != HROOT; idx = history.pred(idx)) {
@@ -84,9 +85,8 @@ static inline int32_t unpack_leftmost(int32_t value)
 }
 
 
-int32_t tagtree_t::precedence(const clos_t &x, const clos_t &y,
-       int32_t &rhox, int32_t &rhoy, const prectable_t *prectbl,
-       const std::vector<Tag> &tags, size_t nclos)
+int32_t precedence(determ_context_t &ctx,
+       const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy)
 {
        const hidx_t xl = x.tlook, yl = y.tlook;
        const uint32_t xo = x.origin, yo = y.origin;
@@ -96,23 +96,36 @@ int32_t tagtree_t::precedence(const clos_t &x, const clos_t &y,
                return 0;
        }
 
-       reconstruct_history(*this, path1, xl);
-       reconstruct_history(*this, path2, yl);
-       std::vector<tag_info_t>::const_reverse_iterator
-               i1 = path1.rbegin(), e1 = path1.rend(), j1 = i1, g1,
-               i2 = path2.rbegin(), e2 = path2.rend(), j2 = i2, g2;
+       tagtree_t &trie = ctx.dc_tagtrie;
+       tag_path_t &p1 = trie.path1, &p2 = trie.path2;
+       reconstruct_history(trie, p1, xl);
+       reconstruct_history(trie, p2, yl);
+       tag_path_t::const_reverse_iterator
+               i1 = p1.rbegin(), e1 = p1.rend(), j1 = i1, g1,
+               i2 = p2.rbegin(), e2 = p2.rend(), j2 = i2, g2;
+
+       const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+       size_t nclos = 0;
+       const prectable_t *prectbl = NULL;
        const bool fork_frame = xo == yo;
 
-       // find fork
        if (fork_frame) {
+               // find fork
                for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2);
        }
+       else {
+               // get precedence table and size of the origin state
+               const kernel_t *k = ctx.dc_kernels[ctx.dc_origin];
+               nclos = k->size;
+               prectbl = k->prectbl;
+       }
 
        // longest precedence
        if (!fork_frame) {
                rhox = unpack_longest(prectbl[xo * nclos + yo]);
                rhoy = unpack_longest(prectbl[yo * nclos + xo]);
-       } else {
+       }
+       else {
                rhox = rhoy = std::numeric_limits<int>::max();
                if (j1 > i1) rhox = rhoy = tags[(j1 - 1)->idx].height;
        }
index 921717d2531dc38b2f4dcdfcacd3ba5ae19e7853..f13bd120fe4ba04a5104dfcbd11517df46fdf6d7 100644 (file)
@@ -18,6 +18,8 @@ struct clos_t;
 
 static const hidx_t HROOT = ~0u;
 
+typedef std::vector<tag_info_t> tag_path_t;
+
 struct tagtree_t
 {
        // the whole tree of tags found by the epsilon-closure
@@ -29,8 +31,8 @@ struct tagtree_t
        std::vector<node_t> nodes;
 
        // reconstruct paths for comparison
-       std::vector<tag_info_t> path1;
-       std::vector<tag_info_t> path2;
+       tag_path_t path1;
+       tag_path_t path2;
 
        tagtree_t();
        hidx_t pred(hidx_t i) const;
@@ -40,8 +42,6 @@ struct tagtree_t
        hidx_t push(hidx_t i, tag_info_t info);
        tagver_t last(hidx_t i, size_t t) const;
        int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const;
-       int32_t precedence(const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy,
-               const prectable_t *prectbl, const std::vector<Tag> &tags, size_t nclos);
 
        FORBID_COPY(tagtree_t);
 };