const size_t nstates = dfa.states.size();
const size_t nchars = dfa.nchars;
- State **i2s = new State*[nstates + 1];
+ State **i2s = new State*[nstates];
for (size_t i = 0; i < nstates; ++i)
{
i2s[i] = dfa.states[i] ? new State : NULL;
}
- i2s[nstates] = NULL;
State **p = &head;
for (size_t i = 0; i < nstates; ++i)
{
const size_t to = t->arcs[c];
for (;++c < nchars && t->arcs[c] == to;);
- s->go.span[j].to = i2s[to];
+ s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to];
s->go.span[j].ub = charset[c];
}
s->go.nSpans = j;
#include <algorithm>
#include <assert.h>
+#include <limits>
#include <list>
#include <set>
#include <string.h>
namespace re2c
{
+const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
+
/*
* note [marking DFA states]
*
, nchars(charset.size() - 1) // (n + 1) bounds for n ranges
{
std::map<uintptr_t, std::list<size_t> > kernels;
- nfa_state_t **work = new nfa_state_t* [nfa.size];
- std::vector<nfa_state_t*> *go = new std::vector<nfa_state_t*>[nchars];
+ nfa_state_t **kernel = new nfa_state_t*[nfa.size];
+ std::vector<nfa_state_t*> *arcs = new std::vector<nfa_state_t*>[nchars];
- findState(work, closure(work, nfa.root), states, kernels);
- for (size_t k = 0; k < states.size(); ++k)
+ findState(kernel, closure(kernel, nfa.root), states, kernels);
+ for (size_t n = 0; n < states.size(); ++n)
{
- dfa_state_t *s = states[k];
+ dfa_state_t *s = states[n];
for(size_t i = 0; i < nchars; ++i)
{
- go[i].clear();
+ arcs[i].clear();
}
- s->rule = NULL;
for (size_t k = 0; k < s->kCount; ++k)
{
nfa_state_t *n = s->kernel[k];
for (; charset[j] != r->lower(); ++j);
for (; charset[j] != r->upper(); ++j)
{
- go[j].push_back(n2);
+ arcs[j].push_back(n2);
}
}
break;
s->arcs = new size_t[nchars];
for(size_t i = 0; i < nchars; ++i)
{
- if(!go[i].empty())
+ if(arcs[i].empty())
{
- nfa_state_t **cP = work;
- for (std::vector<nfa_state_t*>::const_iterator j = go[i].begin(); j != go[i].end(); ++j)
- {
- cP = closure(cP, *j);
- }
- s->arcs[i] = findState(work, cP, states, kernels);
+ s->arcs[i] = NIL;
}
else
{
- s->arcs[i] = ~0u;
- }
- }
- }
- delete [] work;
- delete [] go;
-
- const size_t count = states.size();
- for (size_t i = 0; i < count; ++i)
- {
- for (size_t c = 0; c < nchars; ++c)
- {
- if (states[i]->arcs[c] == ~0u)
- {
- states[i]->arcs[c] = count;
+ nfa_state_t **end = kernel;
+ for (std::vector<nfa_state_t*>::const_iterator j = arcs[i].begin(); j != arcs[i].end(); ++j)
+ {
+ end = closure(end, *j);
+ }
+ s->arcs[i] = findState(kernel, end, states, kernels);
}
}
}
+ delete[] kernel;
+ delete[] arcs;
}
dfa_t::~dfa_t()
TABLE,
MOORE
};
+ static const size_t NIL;
std::vector<dfa_state_t*> states;
const size_t nchars;
{
const size_t count = states.size();
- size_t *part = new size_t[count + 1];
+ size_t *part = new size_t[count];
switch (opts->dfa_minimization)
{
size_t *arcs = states[i]->arcs;
for (size_t c = 0; c < nchars; ++c)
{
- arcs[c] = part[arcs[c]];
+ if (arcs[c] != NIL)
+ {
+ arcs[c] = part[arcs[c]];
+ }
}
}
else
{
const size_t count = states.size();
- bool **tbl = new bool*[count + 1];
- tbl[0] = new bool[count * (count + 1) / 2];
- for (size_t i = 0; i < count; ++i)
+ bool **tbl = new bool*[count];
+ tbl[0] = new bool[count * (count - 1) / 2];
+ for (size_t i = 0; i < count - 1; ++i)
{
tbl[i + 1] = tbl[i] + i;
}
tbl[i][j] = s1->ctx != s2->ctx
|| s1->rule != s2->rule;
}
- tbl[count][i] = true;
}
for (bool loop = true; loop;)
{
std::swap(oi, oj);
}
- if (oi != oj && tbl[oi][oj])
+ if (oi != oj && (oi == NIL || oj == NIL || tbl[oi][oj]))
{
tbl[i][j] = true;
loop = true;
}
}
- for (size_t i = 0; i <= count; ++i)
+ for (size_t i = 0; i < count; ++i)
{
part[i] = i;
for (size_t j = 0; j < i; ++j)
if (init.insert(std::make_pair(key, i)).second)
{
part[i] = i;
- next[i] = count;
+ next[i] = NIL;
}
else
{
next[j] = i;
}
}
- part[count] = count;
size_t *out = new size_t[nchars * count];
size_t *diff = new size_t[count];
loop = false;
for (size_t i = 0; i < count; ++i)
{
- if (i != part[i] || next[i] == count)
+ if (i != part[i] || next[i] == NIL)
{
continue;
}
- for (size_t j = i; j != count; j = next[j])
+ for (size_t j = i; j != NIL; j = next[j])
{
size_t *o = &out[j * nchars];
size_t *a = states[j]->arcs;
for (size_t c = 0; c < nchars; ++c)
{
- o[c] = part[a[c]];
+ o[c] = a[c] == NIL ? NIL : part[a[c]];
}
}
size_t diff_count = 0;
- for (size_t j = i; j != count;)
+ for (size_t j = i; j != NIL;)
{
const size_t j_next = next[j];
size_t n = 0;
{
diff[diff_count++] = j;
part[j] = j;
- next[j] = count;
+ next[j] = NIL;
}
j = j_next;
}
, const std::string &dfa_cond
, uint32_t dfa_line
)
- // +1 for default DFA state (NULL)
: name (dfa_name)
, cond (dfa_cond)
, line (dfa_line)
- , nodes_count (dfa.states.size() + 1) // +1 for default state
- , nodes (new Node [nodes_count])
+ , nodes_count (dfa.states.size())
+ , nodes (new Node [nodes_count + 1]) // +1 for default state
, sizeof_key (4)
, rules (rs)
{
const size_t nc = cs.size() - 1;
// initialize skeleton nodes
- for (size_t i = 0; i < nodes_count - 1; ++i)
+ Node *nil = &nodes[nodes_count];
+ for (size_t i = 0; i < nodes_count; ++i)
{
dfa_state_t *s = dfa.states[i];
- std::vector<std::pair<Node*, uint32_t> > a;
+ std::vector<std::pair<Node*, uint32_t> > arcs;
for (size_t c = 0; c < nc;)
{
const size_t j = s->arcs[c];
for (;++c < nc && s->arcs[c] == j;);
- a.push_back(std::make_pair(j == ~0u ? &nodes[nodes_count - 1] : &nodes[j], cs[c]));
+ Node *to = j == dfa_t::NIL
+ ? nil
+ : &nodes[j];
+ arcs.push_back(std::make_pair(to, cs[c]));
}
- if (a.size() == 1 && a[0].first == &nodes[nodes_count - 1])
+ // all arcs go to default node => this node is final, drop arcs
+ if (arcs.size() == 1 && arcs[0].first == nil)
{
- a.clear();
+ arcs.clear();
}
- nodes[i].init(s->ctx, s->rule, a);
+ nodes[i].init(s->ctx, s->rule, arcs);
}
- // last node (the one corresponding to default state)
- // needs not to be initialized after construction
// calculate maximal path length, check overflow
nodes->calc_dist ();