From: Ulya Trofimovich Date: Tue, 5 Jan 2016 17:15:37 +0000 (+0000) Subject: Explicitely handle default state as special case during DFA construction. X-Git-Tag: 0.16~1^2~14 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cfe14bc4dfc8636dd8c7d0593035bde036d13a46;p=re2c Explicitely handle default state as special case during DFA construction. --- diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 61941864..0333e090 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -39,12 +39,11 @@ DFA::DFA const size_t nstates = dfa.states.size(); const size_t nchars = dfa.nchars; - State **i2s = new State*[nstates + 1]; + State **i2s = new State*[nstates]; for (size_t i = 0; i < nstates; ++i) { i2s[i] = dfa.states[i] ? new State : NULL; } - i2s[nstates] = NULL; State **p = &head; for (size_t i = 0; i < nstates; ++i) @@ -66,7 +65,7 @@ DFA::DFA { const size_t to = t->arcs[c]; for (;++c < nchars && t->arcs[c] == to;); - s->go.span[j].to = i2s[to]; + s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to]; s->go.span[j].ub = charset[c]; } s->go.nSpans = j; diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 40cc1c31..de4d60e1 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -14,6 +15,8 @@ namespace re2c { +const size_t dfa_t::NIL = std::numeric_limits::max(); + /* * note [marking DFA states] * @@ -122,20 +125,19 @@ dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules) , nchars(charset.size() - 1) // (n + 1) bounds for n ranges { std::map > kernels; - nfa_state_t **work = new nfa_state_t* [nfa.size]; - std::vector *go = new std::vector[nchars]; + nfa_state_t **kernel = new nfa_state_t*[nfa.size]; + std::vector *arcs = new std::vector[nchars]; - findState(work, closure(work, nfa.root), states, kernels); - for (size_t k = 0; k < states.size(); ++k) + findState(kernel, closure(kernel, nfa.root), states, kernels); + for (size_t n = 0; n < states.size(); ++n) { - dfa_state_t *s = states[k]; + dfa_state_t *s = states[n]; for(size_t i = 0; i < nchars; ++i) { - go[i].clear(); + arcs[i].clear(); } - s->rule = NULL; for (size_t k = 0; k < s->kCount; ++k) { nfa_state_t *n = s->kernel[k]; @@ -150,7 +152,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules) for (; charset[j] != r->lower(); ++j); for (; charset[j] != r->upper(); ++j) { - go[j].push_back(n2); + arcs[j].push_back(n2); } } break; @@ -189,35 +191,23 @@ dfa_t::dfa_t(const nfa_t &nfa, const charset_t &charset, rules_t &rules) s->arcs = new size_t[nchars]; for(size_t i = 0; i < nchars; ++i) { - if(!go[i].empty()) + if(arcs[i].empty()) { - nfa_state_t **cP = work; - for (std::vector::const_iterator j = go[i].begin(); j != go[i].end(); ++j) - { - cP = closure(cP, *j); - } - s->arcs[i] = findState(work, cP, states, kernels); + s->arcs[i] = NIL; } else { - s->arcs[i] = ~0u; - } - } - } - delete [] work; - delete [] go; - - const size_t count = states.size(); - for (size_t i = 0; i < count; ++i) - { - for (size_t c = 0; c < nchars; ++c) - { - if (states[i]->arcs[c] == ~0u) - { - states[i]->arcs[c] = count; + nfa_state_t **end = kernel; + for (std::vector::const_iterator j = arcs[i].begin(); j != arcs[i].end(); ++j) + { + end = closure(end, *j); + } + s->arcs[i] = findState(kernel, end, states, kernels); } } } + delete[] kernel; + delete[] arcs; } dfa_t::~dfa_t() diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 5b72a139..0e53fe4d 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -46,6 +46,7 @@ struct dfa_t TABLE, MOORE }; + static const size_t NIL; std::vector states; const size_t nchars; diff --git a/re2c/src/ir/dfa/minimization.cc b/re2c/src/ir/dfa/minimization.cc index 851b3fac..8fdf1389 100644 --- a/re2c/src/ir/dfa/minimization.cc +++ b/re2c/src/ir/dfa/minimization.cc @@ -12,7 +12,7 @@ void dfa_t::minimization() { const size_t count = states.size(); - size_t *part = new size_t[count + 1]; + size_t *part = new size_t[count]; switch (opts->dfa_minimization) { @@ -27,7 +27,10 @@ void dfa_t::minimization() size_t *arcs = states[i]->arcs; for (size_t c = 0; c < nchars; ++c) { - arcs[c] = part[arcs[c]]; + if (arcs[c] != NIL) + { + arcs[c] = part[arcs[c]]; + } } } else @@ -58,9 +61,9 @@ void dfa_t::minimization_table(size_t *part) { const size_t count = states.size(); - bool **tbl = new bool*[count + 1]; - tbl[0] = new bool[count * (count + 1) / 2]; - for (size_t i = 0; i < count; ++i) + bool **tbl = new bool*[count]; + tbl[0] = new bool[count * (count - 1) / 2]; + for (size_t i = 0; i < count - 1; ++i) { tbl[i + 1] = tbl[i] + i; } @@ -74,7 +77,6 @@ void dfa_t::minimization_table(size_t *part) tbl[i][j] = s1->ctx != s2->ctx || s1->rule != s2->rule; } - tbl[count][i] = true; } for (bool loop = true; loop;) @@ -94,7 +96,7 @@ void dfa_t::minimization_table(size_t *part) { std::swap(oi, oj); } - if (oi != oj && tbl[oi][oj]) + if (oi != oj && (oi == NIL || oj == NIL || tbl[oi][oj])) { tbl[i][j] = true; loop = true; @@ -106,7 +108,7 @@ void dfa_t::minimization_table(size_t *part) } } - for (size_t i = 0; i <= count; ++i) + for (size_t i = 0; i < count; ++i) { part[i] = i; for (size_t j = 0; j < i; ++j) @@ -148,7 +150,7 @@ void dfa_t::minimization_moore(size_t *part) if (init.insert(std::make_pair(key, i)).second) { part[i] = i; - next[i] = count; + next[i] = NIL; } else { @@ -158,7 +160,6 @@ void dfa_t::minimization_moore(size_t *part) next[j] = i; } } - part[count] = count; size_t *out = new size_t[nchars * count]; size_t *diff = new size_t[count]; @@ -167,23 +168,23 @@ void dfa_t::minimization_moore(size_t *part) loop = false; for (size_t i = 0; i < count; ++i) { - if (i != part[i] || next[i] == count) + if (i != part[i] || next[i] == NIL) { continue; } - for (size_t j = i; j != count; j = next[j]) + for (size_t j = i; j != NIL; j = next[j]) { size_t *o = &out[j * nchars]; size_t *a = states[j]->arcs; for (size_t c = 0; c < nchars; ++c) { - o[c] = part[a[c]]; + o[c] = a[c] == NIL ? NIL : part[a[c]]; } } size_t diff_count = 0; - for (size_t j = i; j != count;) + for (size_t j = i; j != NIL;) { const size_t j_next = next[j]; size_t n = 0; @@ -202,7 +203,7 @@ void dfa_t::minimization_moore(size_t *part) { diff[diff_count++] = j; part[j] = j; - next[j] = count; + next[j] = NIL; } j = j_next; } diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index 0d9ca54f..deee1133 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -77,36 +77,38 @@ Skeleton::Skeleton , const std::string &dfa_cond , uint32_t dfa_line ) - // +1 for default DFA state (NULL) : name (dfa_name) , cond (dfa_cond) , line (dfa_line) - , nodes_count (dfa.states.size() + 1) // +1 for default state - , nodes (new Node [nodes_count]) + , nodes_count (dfa.states.size()) + , nodes (new Node [nodes_count + 1]) // +1 for default state , sizeof_key (4) , rules (rs) { const size_t nc = cs.size() - 1; // initialize skeleton nodes - for (size_t i = 0; i < nodes_count - 1; ++i) + Node *nil = &nodes[nodes_count]; + for (size_t i = 0; i < nodes_count; ++i) { dfa_state_t *s = dfa.states[i]; - std::vector > a; + std::vector > arcs; for (size_t c = 0; c < nc;) { const size_t j = s->arcs[c]; for (;++c < nc && s->arcs[c] == j;); - a.push_back(std::make_pair(j == ~0u ? &nodes[nodes_count - 1] : &nodes[j], cs[c])); + Node *to = j == dfa_t::NIL + ? nil + : &nodes[j]; + arcs.push_back(std::make_pair(to, cs[c])); } - if (a.size() == 1 && a[0].first == &nodes[nodes_count - 1]) + // all arcs go to default node => this node is final, drop arcs + if (arcs.size() == 1 && arcs[0].first == nil) { - a.clear(); + arcs.clear(); } - nodes[i].init(s->ctx, s->rule, a); + nodes[i].init(s->ctx, s->rule, arcs); } - // last node (the one corresponding to default state) - // needs not to be initialized after construction // calculate maximal path length, check overflow nodes->calc_dist ();