]> granicus.if.org Git - re2c/commitdiff
Explicitely handle default state as special case during DFA construction.
authorUlya Trofimovich <skvadrik@gmail.com>
Tue, 5 Jan 2016 17:15:37 +0000 (17:15 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Tue, 5 Jan 2016 17:15:37 +0000 (17:15 +0000)
re2c/src/ir/adfa/adfa.cc
re2c/src/ir/dfa/determinization.cc
re2c/src/ir/dfa/dfa.h
re2c/src/ir/dfa/minimization.cc
re2c/src/ir/skeleton/skeleton.cc

index 61941864b98038aac0d1f8cc262fb0141272faff..0333e090b51dba92828cad03db189cc224e8a703 100644 (file)
@@ -39,12 +39,11 @@ DFA::DFA
        const size_t nstates = dfa.states.size();
        const size_t nchars = dfa.nchars;
 
-       State **i2s = new State*[nstates + 1];
+       State **i2s = new State*[nstates];
        for (size_t i = 0; i < nstates; ++i)
        {
                i2s[i] = dfa.states[i] ? new State : NULL;
        }
-       i2s[nstates] = NULL;
 
        State **p = &head;
        for (size_t i = 0; i < nstates; ++i)
@@ -66,7 +65,7 @@ DFA::DFA
                        {
                                const size_t to = t->arcs[c];
                                for (;++c < nchars && t->arcs[c] == to;);
-                               s->go.span[j].to = i2s[to];
+                               s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to];
                                s->go.span[j].ub = charset[c];
                        }
                        s->go.nSpans = j;
index 40cc1c31d8926472d0372a0c9cf5fd52c7f981a5..de4d60e10b4386de16fd92a51866f0613c896c3e 100644 (file)
@@ -1,5 +1,6 @@
 #include <algorithm>
 #include <assert.h>
+#include <limits>
 #include <list>
 #include <set>
 #include <string.h>
@@ -14,6 +15,8 @@
 namespace re2c
 {
 
+const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
+
 /*
  * note [marking DFA states]
  *
@@ -122,20 +125,19 @@ dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules)
        , nchars(charset.size() - 1) // (n + 1) bounds for n ranges
 {
        std::map<uintptr_t, std::list<size_t> > kernels;
-       nfa_state_t **work = new nfa_state_t* [nfa.size];
-       std::vector<nfa_state_t*> *go = new std::vector<nfa_state_t*>[nchars];
+       nfa_state_t **kernel = new nfa_state_t*[nfa.size];
+       std::vector<nfa_state_t*> *arcs = new std::vector<nfa_state_t*>[nchars];
 
-       findState(work, closure(work, nfa.root), states, kernels);
-       for (size_t k = 0; k < states.size(); ++k)
+       findState(kernel, closure(kernel, nfa.root), states, kernels);
+       for (size_t n = 0; n < states.size(); ++n)
        {
-               dfa_state_t *s = states[k];
+               dfa_state_t *s = states[n];
 
                for(size_t i = 0; i < nchars; ++i)
                {
-                       go[i].clear();
+                       arcs[i].clear();
                }
 
-               s->rule = NULL;
                for (size_t k = 0; k < s->kCount; ++k)
                {
                        nfa_state_t *n = s->kernel[k];
@@ -150,7 +152,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules)
                                                for (; charset[j] != r->lower(); ++j);
                                                for (; charset[j] != r->upper(); ++j)
                                                {
-                                                       go[j].push_back(n2);
+                                                       arcs[j].push_back(n2);
                                                }
                                        }
                                        break;
@@ -189,35 +191,23 @@ dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules)
                s->arcs = new size_t[nchars];
                for(size_t i = 0; i < nchars; ++i)
                {
-                       if(!go[i].empty())
+                       if(arcs[i].empty())
                        {
-                               nfa_state_t **cP = work;
-                               for (std::vector<nfa_state_t*>::const_iterator j = go[i].begin(); j != go[i].end(); ++j)
-                               {
-                                       cP = closure(cP, *j);
-                               }
-                               s->arcs[i] = findState(work, cP, states, kernels);
+                               s->arcs[i] = NIL;
                        }
                        else
                        {
-                               s->arcs[i] = ~0u;
-                       }
-               }
-       }
-       delete [] work;
-       delete [] go;
-
-       const size_t count = states.size();
-       for (size_t i = 0; i < count; ++i)
-       {
-               for (size_t c = 0; c < nchars; ++c)
-               {
-                       if (states[i]->arcs[c] == ~0u)
-                       {
-                               states[i]->arcs[c] = count;
+                               nfa_state_t **end = kernel;
+                               for (std::vector<nfa_state_t*>::const_iterator j = arcs[i].begin(); j != arcs[i].end(); ++j)
+                               {
+                                       end = closure(end, *j);
+                               }
+                               s->arcs[i] = findState(kernel, end, states, kernels);
                        }
                }
        }
+       delete[] kernel;
+       delete[] arcs;
 }
 
 dfa_t::~dfa_t()
index 5b72a139aafdbbdb8e1370ad26da18e1c377b36f..0e53fe4d2325fe6511d06cfbc96ea833e2b8084a 100644 (file)
@@ -46,6 +46,7 @@ struct dfa_t
                TABLE,
                MOORE
        };
+       static const size_t NIL;
 
        std::vector<dfa_state_t*> states;
        const size_t nchars;
index 851b3face27afee761be4286a1e21f9173aad44e..8fdf1389131b55fcb281d9b065552d6860793baa 100644 (file)
@@ -12,7 +12,7 @@ void dfa_t::minimization()
 {
        const size_t count = states.size();
 
-       size_t *part = new size_t[count + 1];
+       size_t *part = new size_t[count];
 
        switch (opts->dfa_minimization)
        {
@@ -27,7 +27,10 @@ void dfa_t::minimization()
                        size_t *arcs = states[i]->arcs;
                        for (size_t c = 0; c < nchars; ++c)
                        {
-                               arcs[c] = part[arcs[c]];
+                               if (arcs[c] != NIL)
+                               {
+                                       arcs[c] = part[arcs[c]];
+                               }
                        }
                }
                else
@@ -58,9 +61,9 @@ void dfa_t::minimization_table(size_t *part)
 {
        const size_t count = states.size();
 
-       bool **tbl = new bool*[count + 1];
-       tbl[0] = new bool[count * (count + 1) / 2];
-       for (size_t i = 0; i < count; ++i)
+       bool **tbl = new bool*[count];
+       tbl[0] = new bool[count * (count - 1) / 2];
+       for (size_t i = 0; i < count - 1; ++i)
        {
                tbl[i + 1] = tbl[i] + i;
        }
@@ -74,7 +77,6 @@ void dfa_t::minimization_table(size_t *part)
                        tbl[i][j] = s1->ctx != s2->ctx
                                || s1->rule != s2->rule;
                }
-               tbl[count][i] = true;
        }
 
        for (bool loop = true; loop;)
@@ -94,7 +96,7 @@ void dfa_t::minimization_table(size_t *part)
                                                {
                                                        std::swap(oi, oj);
                                                }
-                                               if (oi != oj && tbl[oi][oj])
+                                               if (oi != oj && (oi == NIL || oj == NIL || tbl[oi][oj]))
                                                {
                                                        tbl[i][j] = true;
                                                        loop = true;
@@ -106,7 +108,7 @@ void dfa_t::minimization_table(size_t *part)
                }
        }
 
-       for (size_t i = 0; i <= count; ++i)
+       for (size_t i = 0; i < count; ++i)
        {
                part[i] = i;
                for (size_t j = 0; j < i; ++j)
@@ -148,7 +150,7 @@ void dfa_t::minimization_moore(size_t *part)
                if (init.insert(std::make_pair(key, i)).second)
                {
                        part[i] = i;
-                       next[i] = count;
+                       next[i] = NIL;
                }
                else
                {
@@ -158,7 +160,6 @@ void dfa_t::minimization_moore(size_t *part)
                        next[j] = i;
                }
        }
-       part[count] = count;
 
        size_t *out = new size_t[nchars * count];
        size_t *diff = new size_t[count];
@@ -167,23 +168,23 @@ void dfa_t::minimization_moore(size_t *part)
                loop = false;
                for (size_t i = 0; i < count; ++i)
                {
-                       if (i != part[i] || next[i] == count)
+                       if (i != part[i] || next[i] == NIL)
                        {
                                continue;
                        }
 
-                       for (size_t j = i; j != count; j = next[j])
+                       for (size_t j = i; j != NIL; j = next[j])
                        {
                                size_t *o = &out[j * nchars];
                                size_t *a = states[j]->arcs;
                                for (size_t c = 0; c < nchars; ++c)
                                {
-                                       o[c] = part[a[c]];
+                                       o[c] = a[c] == NIL ? NIL : part[a[c]];
                                }
                        }
 
                        size_t diff_count = 0;
-                       for (size_t j = i; j != count;)
+                       for (size_t j = i; j != NIL;)
                        {
                                const size_t j_next = next[j];
                                size_t n = 0;
@@ -202,7 +203,7 @@ void dfa_t::minimization_moore(size_t *part)
                                {
                                        diff[diff_count++] = j;
                                        part[j] = j;
-                                       next[j] = count;
+                                       next[j] = NIL;
                                }
                                j = j_next;
                        }
index 0d9ca54f6203e5fffd55d98736221e5843dacf23..deee1133406fae26b1af18852cfa65bda3668915 100644 (file)
@@ -77,36 +77,38 @@ Skeleton::Skeleton
        , const std::string &dfa_cond
        , uint32_t dfa_line
        )
-       // +1 for default DFA state (NULL)
        : name (dfa_name)
        , cond (dfa_cond)
        , line (dfa_line)
-       , nodes_count (dfa.states.size() + 1) // +1 for default state
-       , nodes (new Node [nodes_count])
+       , nodes_count (dfa.states.size())
+       , nodes (new Node [nodes_count + 1]) // +1 for default state
        , sizeof_key (4)
        , rules (rs)
 {
        const size_t nc = cs.size() - 1;
 
        // initialize skeleton nodes
-       for (size_t i = 0; i < nodes_count - 1; ++i)
+       Node *nil = &nodes[nodes_count];
+       for (size_t i = 0; i < nodes_count; ++i)
        {
                dfa_state_t *s = dfa.states[i];
-               std::vector<std::pair<Node*, uint32_t> > a;
+               std::vector<std::pair<Node*, uint32_t> > arcs;
                for (size_t c = 0; c < nc;)
                {
                        const size_t j = s->arcs[c];
                        for (;++c < nc && s->arcs[c] == j;);
-                       a.push_back(std::make_pair(j == ~0u ? &nodes[nodes_count - 1] : &nodes[j], cs[c]));
+                       Node *to = j == dfa_t::NIL
+                               ? nil
+                               : &nodes[j];
+                       arcs.push_back(std::make_pair(to, cs[c]));
                }
-               if (a.size() == 1 && a[0].first == &nodes[nodes_count - 1])
+               // all arcs go to default node => this node is final, drop arcs
+               if (arcs.size() == 1 && arcs[0].first == nil)
                {
-                       a.clear();
+                       arcs.clear();
                }
-               nodes[i].init(s->ctx, s->rule, a);
+               nodes[i].init(s->ctx, s->rule, arcs);
        }
-       // last node (the one corresponding to default state)
-       // needs not to be initialized after construction
 
        // calculate maximal path length, check overflow
        nodes->calc_dist ();