From: Ulya Trofimovich Date: Sun, 12 Aug 2018 19:38:11 +0000 (+0100) Subject: Gathered all determinization-related data in a struct to avoid passing many parameters. X-Git-Tag: 1.1~12 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=80eab695735db115f41efd7d646ac3b4d66314ba;p=re2c Gathered all determinization-related data in a struct to avoid passing many parameters. --- diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 378fb767..f1479dad 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -24,6 +24,7 @@ SRC_HDR = \ src/adfa/dump.h \ src/dfa/cfg/cfg.h \ src/dfa/closure.h \ + src/dfa/determinization.h \ src/dfa/dfa.h \ src/dfa/dump.h \ src/dfa/find_state.h \ diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index 13db4da8..5323bc84 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -10,6 +10,7 @@ #include "src/conf/opt.h" #include "src/dfa/closure.h" +#include "src/dfa/determinization.h" #include "src/dfa/dfa.h" #include "src/dfa/tagpool.h" #include "src/dfa/tcmd.h" @@ -76,42 +77,40 @@ namespace re2c */ -static void closure_posix(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool, const std::vector &tags, const prectable_t *prectbl, size_t noldclos); -static void closure_leftmost(const closure_t &init, closure_t &done, closure_t *shadow, Tagpool &tagpool); -static void prune(closure_t &clos, std::valarray &rules); -static void lower_lookahead_to_transition(closure_t &clos); -static tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers_t &newvers); -static void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags, - const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos); -static bool cmpby_rule_state(const clos_t &x, const clos_t &y); +static void closure_posix(determ_context_t &); +static nfa_state_t *relax(determ_context_t &, clos_t); +static nfa_state_t *explore(determ_context_t &, nfa_state_t *); +static void closure_leftmost(determ_context_t &); +static void prune(closure_t &, std::valarray &); +static void lower_lookahead_to_transition(closure_t &); +static void generate_versions(determ_context_t &); +static void orders(determ_context_t &); +static bool cmpby_rule_state(const clos_t &, const clos_t &); +static int32_t pack(int32_t, int32_t); -tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2, - Tagpool &tagpool, newvers_t &newvers, closure_t *shadow, - const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos) +void tagged_epsilon_closure(determ_context_t &ctx) { + closure_t &closure = ctx.dc_closure; + // build tagged epsilon-closure of the given set of NFA states - if (tagpool.opts->posix_captures) { - closure_posix(clos1, clos2, shadow, tagpool, dfa.tags, prectbl_old, noldclos); - prune(clos2, dfa.rules); - std::sort(clos2.begin(), clos2.end(), cmpby_rule_state); - orders(clos2, tagpool, dfa.tags, prectbl_old, prectbl_new, noldclos); + if (ctx.dc_opts->posix_captures) { + closure_posix(ctx); + prune(closure, ctx.dc_nfa.rules); + std::sort(closure.begin(), closure.end(), cmpby_rule_state); + orders(ctx); } else { - closure_leftmost(clos1, clos2, shadow, tagpool); - prune(clos2, dfa.rules); + closure_leftmost(ctx); + prune(closure, ctx.dc_nfa.rules); } // see note [the difference between TDFA(0) and TDFA(1)] - if (!tagpool.opts->lookahead) { - lower_lookahead_to_transition(clos2); - if (shadow) lower_lookahead_to_transition(*shadow); + if (!ctx.dc_opts->lookahead) { + lower_lookahead_to_transition(closure); } // merge tags from different rules, find nondeterministic tags - tcmd_t *cmd = generate_versions(dfa, clos2, tagpool, newvers); - if (shadow) generate_versions(dfa, *shadow, tagpool, newvers); - - return cmd; + generate_versions(ctx); } @@ -128,74 +127,73 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y) } -static nfa_state_t *relax(clos_t x, closure_t &done, - closure_t *shadow, Tagpool &tagpool, const std::vector &tags, - const prectable_t *prectbl, size_t noldclos) +nfa_state_t *relax(determ_context_t &ctx, clos_t x) { + closure_t &done = ctx.dc_closure; nfa_state_t *q = x.state; - uint32_t &i = q->clos; + const uint32_t idx = q->clos; + int32_t h1, h2; // first time we see this state - if (i == NOCLOS) { - i = static_cast(done.size()); + if (idx == NOCLOS) { + q->clos = static_cast(done.size()); done.push_back(x); } + // States of in-degree less than 2 are not joint points; // the fact that we are re-scanning this state means that we found // a better path to some previous state. Due to the right distributivity // of path comparison over path concatenation (X < Y => XZ < YZ) we // can just propagate the new path up to the next join point. else if (q->indeg < 2) { - std::swap(x, done[i]); - if (shadow) shadow->push_back(x); + done[idx] = x; } + // join point; compare the new path and the old path + else if (precedence(ctx, x, done[idx], h1, h2) < 0) { + done[idx] = x; + } + + // the previous path was better, discard the new one else { - clos_t &y = done[i]; - int h1, h2, l; - l = tagpool.history.precedence (x, y, h1, h2, prectbl, tags, noldclos); - if (l < 0) std::swap(x, y); - if (shadow && l != 0) shadow->push_back(x); - if (l >= 0) q = NULL; + q = NULL; } return q; } -static nfa_state_t *explore(nfa_state_t *q, closure_t &done, - closure_t *shadow, Tagpool &tagpool, const std::vector &tags, - const prectable_t *prectbl, size_t noldclos) +nfa_state_t *explore(determ_context_t &ctx, nfa_state_t *q) { // find the next admissible transition, adjust the index // of the next transition and return the to-state nfa_state_t *p = NULL; - clos_t x = done[q->clos]; + clos_t x = ctx.dc_closure[q->clos]; switch (q->type) { case nfa_state_t::NIL: if (q->arcidx == 0) { x.state = q->nil.out; - p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos); + p = relax(ctx, x); ++q->arcidx; } break; case nfa_state_t::ALT: if (q->arcidx == 0) { x.state = q->alt.out1; - p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos); + p = relax(ctx, x); ++q->arcidx; } if (q->arcidx == 1 && !p) { x.state = q->alt.out2; - p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos); + p = relax(ctx, x); ++q->arcidx; } break; case nfa_state_t::TAG: if (q->arcidx == 0) { x.state = q->tag.out; - x.tlook = tagpool.history.push(x.tlook, q->tag.info); - p = relax(x, done, shadow, tagpool, tags, prectbl, noldclos); + x.tlook = ctx.dc_tagtrie.push(x.tlook, q->tag.info); + p = relax(ctx, x); ++q->arcidx; } break; @@ -207,21 +205,20 @@ static nfa_state_t *explore(nfa_state_t *q, closure_t &done, } -void closure_posix(const closure_t &init, closure_t &done, - closure_t *shadow, Tagpool &tagpool, const std::vector &tags, - const prectable_t *prectbl, size_t noldclos) +void closure_posix(determ_context_t &ctx) { + const closure_t &init = ctx.dc_reached; + closure_t &done = ctx.dc_closure; std::stack - &topsort = tagpool.astack, - &linear = tagpool.bstack; + &topsort = ctx.dc_tagpool.astack, + &linear = ctx.dc_tagpool.bstack; nfa_state_t *q, *p; done.clear(); - if (shadow) shadow->clear(); // enqueue all initial states (there might be duplicates) for (cclositer_t c = init.begin(); c != init.end(); ++c) { - q = relax(*c, done, shadow, tagpool, tags, prectbl, noldclos); + q = relax(ctx, *c); if (q) { topsort.push(q); q->status = GOR_TOPSORT; @@ -246,7 +243,7 @@ void closure_posix(const closure_t &init, closure_t &done, q->status = GOR_TOPSORT; // find next admissible transition - while ((p = explore(q, done, shadow, tagpool, tags, prectbl, noldclos)) + while ((p = explore(ctx, q)) && p->status != GOR_NOPASS) { p->active = 1; } @@ -274,7 +271,7 @@ void closure_posix(const closure_t &init, closure_t &done, if (q->active) { // scan admissible transitions q->arcidx = 0; - while ((p = explore(q, done, shadow, tagpool, tags, prectbl, noldclos))) { + while ((p = explore(ctx, q))) { if (p->status == GOR_NOPASS) { topsort.push(p); p->arcidx = 0; @@ -287,7 +284,6 @@ void closure_posix(const closure_t &init, closure_t &done, q->status = GOR_NOPASS; q->active = 0; - q->arcidx = 0; } } @@ -295,19 +291,20 @@ void closure_posix(const closure_t &init, closure_t &done, for (clositer_t i = done.begin(); i != done.end(); ++i) { q = i->state; q->clos = NOCLOS; - assert(q->status == GOR_NOPASS && q->active == 0 && q->arcidx == 0); + q->arcidx = 0; + assert(q->status == GOR_NOPASS && q->active == 0); } } -void closure_leftmost(const closure_t &init, closure_t &done, - closure_t *shadow, Tagpool &tagpool) +void closure_leftmost(determ_context_t &ctx) { - std::stack &todo = tagpool.cstack; + const closure_t &init = ctx.dc_reached; + closure_t &done = ctx.dc_closure; + std::stack &todo = ctx.dc_tagpool.cstack; // enqueue all initial states done.clear(); - if (shadow) shadow->clear(); for (rcclositer_t c = init.rbegin(); c != init.rend(); ++c) { todo.push(*c); } @@ -321,30 +318,27 @@ void closure_leftmost(const closure_t &init, closure_t &done, if (n->clos == NOCLOS) { n->clos = static_cast(done.size()); done.push_back(x); - } else { - if (shadow) shadow->push_back(x); - continue; - } - switch (n->type) { - case nfa_state_t::NIL: - x.state = n->nil.out; - todo.push(x); - break; - case nfa_state_t::ALT: - x.state = n->alt.out2; - todo.push(x); - x.state = n->alt.out1; - todo.push(x); - break; - case nfa_state_t::TAG: - x.state = n->tag.out; - x.tlook = tagpool.history.push(x.tlook, n->tag.info); - todo.push(x); - break; - case nfa_state_t::RAN: - case nfa_state_t::FIN: - break; + switch (n->type) { + case nfa_state_t::NIL: + x.state = n->nil.out; + todo.push(x); + break; + case nfa_state_t::ALT: + x.state = n->alt.out2; + todo.push(x); + x.state = n->alt.out1; + todo.push(x); + break; + case nfa_state_t::TAG: + x.state = n->tag.out; + x.tlook = ctx.dc_tagtrie.push(x.tlook, n->tag.info); + todo.push(x); + break; + case nfa_state_t::RAN: + case nfa_state_t::FIN: + break; + } } } @@ -356,9 +350,9 @@ void closure_leftmost(const closure_t &init, closure_t &done, } -void prune(closure_t &clos, std::valarray &rules) +void prune(closure_t &closure, std::valarray &rules) { - clositer_t b = clos.begin(), e = clos.end(), i, j; + clositer_t b = closure.begin(), e = closure.end(), i, j; // drop "inner" states (non-final without outgoing non-epsilon transitions) j = std::stable_partition(b, e, clos_t::ran); @@ -376,29 +370,35 @@ void prune(closure_t &clos, std::valarray &rules) n = static_cast(j - b) + 1; } - clos.resize(n); + closure.resize(n); } -void lower_lookahead_to_transition(closure_t &clos) +void lower_lookahead_to_transition(closure_t &closure) { - for (clositer_t c = clos.begin(); c != clos.end(); ++c) { + for (clositer_t c = closure.begin(); c != closure.end(); ++c) { c->ttran = c->tlook; c->tlook = HROOT; } } -tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers_t &newvers) +void generate_versions(determ_context_t &ctx) { - tcmd_t *cmd = NULL; - const size_t ntag = tagpool.ntags; - tagver_t *vers = tagpool.buffer, &maxver = dfa.maxtagver; - tagtree_t &tagtree = tagpool.history; + dfa_t &dfa = ctx.dc_dfa; const std::vector &tags = dfa.tags; + const size_t ntag = tags.size(); + tagver_t &maxver = dfa.maxtagver; + Tagpool &tagpool = ctx.dc_tagpool; + tagver_t *vers = tagpool.buffer; + closure_t &clos = ctx.dc_closure; + tagtree_t &tagtree = ctx.dc_tagtrie; + newvers_t &newvers = ctx.dc_newvers; + clositer_t b = clos.begin(), e = clos.end(), c; - newver_cmp_t cmp = {tagtree}; + newver_cmp_t cmp(tagtree); newvers_t newacts(cmp); + tcmd_t *cmd = NULL; // for each tag, if there is at least one tagged transition, // allocate new version (negative for bottom and positive for @@ -468,32 +468,35 @@ tcmd_t *generate_versions(dfa_t &dfa, closure_t &clos, Tagpool &tagpool, newvers c->tvers = tagpool.insert(vers); } - return cmd; + ctx.dc_actions = cmd; } -static inline int32_t pack(int32_t longest, int32_t leftmost) +int32_t pack(int32_t longest, int32_t leftmost) { // leftmost: higher 2 bits, longest: lower 30 bits return longest | (leftmost << 30); } -void orders(closure_t &clos, Tagpool &tagpool, const std::vector &tags, - const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos) +void orders(determ_context_t &ctx) { - const size_t nclos = clos.size(); - prectbl_new = tagpool.alc.alloct(nclos * nclos); + closure_t &closure = ctx.dc_closure; + const size_t nclos = closure.size(); + + prectable_t *prectbl = ctx.dc_allocator.alloct(nclos * nclos); for (size_t i = 0; i < nclos; ++i) { for (size_t j = i + 1; j < nclos; ++j) { int32_t rho1, rho2, l; - l = tagpool.history.precedence (clos[i], clos[j], rho1, rho2, prectbl_old, tags, noldclos); - prectbl_new[i * nclos + j] = pack(rho1, l); - prectbl_new[j * nclos + i] = pack(rho2, -l); + l = precedence (ctx, closure[i], closure[j], rho1, rho2); + prectbl[i * nclos + j] = pack(rho1, l); + prectbl[j * nclos + i] = pack(rho2, -l); } - prectbl_new[i * nclos + i] = 0; + prectbl[i * nclos + i] = 0; } + + ctx.dc_prectbl = prectbl; } } // namespace re2c diff --git a/re2c/src/dfa/closure.h b/re2c/src/dfa/closure.h index 1313a89f..4e75e12c 100644 --- a/re2c/src/dfa/closure.h +++ b/re2c/src/dfa/closure.h @@ -23,10 +23,10 @@ typedef slab_allocator_t<> allocator_t; struct clos_t { nfa_state_t *state; + uint32_t origin; uint32_t tvers; // vector of tag versions (including lookahead tags) hidx_t ttran; // history of transition tags hidx_t tlook; // history of lookahead tags - uint32_t origin; static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; } static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; } @@ -48,6 +48,9 @@ struct newver_t struct newver_cmp_t { tagtree_t &history; + + explicit newver_cmp_t(tagtree_t &h) : history(h) {} + bool operator()(const newver_t &x, const newver_t &y) const { if (x.tag < y.tag) return true; @@ -62,10 +65,6 @@ struct newver_cmp_t typedef std::map newvers_t; -tcmd_t *closure(dfa_t &dfa, closure_t &clos1, closure_t &clos2, - Tagpool &tagpool, newvers_t &newvers, closure_t *shadow, - const prectable_t *prectbl_old, prectable_t *&prectbl_new, size_t noldclos); - } // namespace re2c #endif // _RE2C_DFA_CLOSURE_ diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 78211fa2..e7072b91 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -12,6 +12,7 @@ #include "src/conf/warn.h" #include "src/dfa/closure.h" #include "src/dfa/dfa.h" +#include "src/dfa/determinization.h" #include "src/dfa/dump.h" #include "src/dfa/find_state.h" #include "src/dfa/tagpool.h" @@ -25,14 +26,12 @@ namespace re2c { -static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol); -static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol); -static void warn_nondeterministic_tags(const kernels_t &kernels, - const Tagpool &tagpool, const std::vector &tags, - const std::valarray &rules, const std::string &cond, Warn &warn); +static nfa_state_t *transition(nfa_state_t *, uint32_t); +static void reach_on_symbol(determ_context_t &); +static void warn_nondeterministic_tags(const determ_context_t &); -const size_t dfa_t::NIL = std::numeric_limits::max(); +const uint32_t dfa_t::NIL = ~0u; nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) @@ -49,21 +48,59 @@ nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) } -void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol) +void reach_on_symbol(determ_context_t &ctx) { - clos.clear(); + const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin]; + closure_t &reached = ctx.dc_reached; + const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol]; + + reached.clear(); for (uint32_t i = 0; i < kernel->size; ++i) { nfa_state_t *s = transition(kernel->state[i], symbol); if (s) { - clos_t c = {s, kernel->tvers[i], kernel->tlook[i], HROOT, i}; - clos.push_back(c); + clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT}; + reached.push_back(c); + } + } +} + + +static uint32_t init_tag_versions(determ_context_t &ctx) +{ + dfa_t &dfa = ctx.dc_dfa; + const size_t ntags = dfa.tags.size(); + + // all-zero tag configuration must have static number zero + assert(ZERO_TAGS == ctx.dc_tagpool.insert_const(TAGVER_ZERO)); + + // initial tag versions: [1 .. N] + const uint32_t INITIAL_TAGS = ctx.dc_tagpool.insert_succ(1); + + // other versions: [ .. -(N + 1)] and [N + 1 .. ] + dfa.maxtagver = static_cast(ntags); + + // final/fallback versions will be assigned on the go + dfa.finvers = new tagver_t[ntags]; + for (size_t i = 0; i < ntags; ++i) { + dfa.finvers[i] = fixed(dfa.tags[i]) ? TAGVER_ZERO : ++dfa.maxtagver; + } + + // mark tags with history (initial and final) + for (size_t i = 0; i < ntags; ++i) { + if (history(dfa.tags[i])) { + tagver_t v = static_cast(i) + 1, f = dfa.finvers[i]; + if (f != TAGVER_ZERO) { + dfa.mtagvers.insert(f); + } + dfa.mtagvers.insert(v); } } + + return INITIAL_TAGS; } -dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, - const std::string &cond, Warn &warn) +dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn &warn) : states() , nchars(nfa.charset.size() - 1) // (n + 1) bounds for n ranges , charset(nfa.charset) @@ -76,60 +113,34 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, , tcmd0(NULL) , tcid0(TCID0) { - const size_t ntag = tags.size(); - Tagpool tagpool(opts, ntag); - kernels_t kernels(tagpool, tcpool, tags); - closure_t clos1, clos2; - newver_cmp_t newvers_cmp = {tagpool.history}; - newvers_t newvers(newvers_cmp); - tcmd_t *acts; - dump_dfa_t dump(*this, tagpool, nfa); - prectable_t *prectbl = NULL; + determ_context_t ctx(opts, warn, cond, nfa, *this); - // all-zero tag configuration must have static number zero - assert(ZERO_TAGS == tagpool.insert_const(TAGVER_ZERO)); - // initial tag versions: [1 .. N] - const size_t INITIAL_TAGS = tagpool.insert_succ(1); - // other versions: [ .. -(N + 1)] and [N + 1 .. ] - maxtagver = static_cast(ntag); + const uint32_t INITIAL_TAGS = init_tag_versions(ctx); - // final/fallback versions will be assigned on the go - finvers = new tagver_t[ntag]; - for (size_t i = 0; i < ntag; ++i) { - finvers[i] = fixed(tags[i]) ? TAGVER_ZERO : ++maxtagver; - } - - // mark tags with history (initial and final) - for (size_t i = 0; i < ntag; ++i) { - if (history(tags[i])) { - tagver_t v = static_cast(i) + 1, f = finvers[i]; - if (f != TAGVER_ZERO) mtagvers.insert(f); - mtagvers.insert(v); - } - } + // initial state + const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT}; + ctx.dc_reached.push_back(c0); + tagged_epsilon_closure(ctx); + find_state(ctx); // iterate while new kernels are added: for each alphabet symbol, // build tagged epsilon-closure of all reachable NFA states, // then find identical or mappable DFA state or add a new one + for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) { + + ctx.dc_origin = i; + ctx.dc_newvers.clear(); - clos_t c0 = {nfa.root, INITIAL_TAGS, HROOT, HROOT, 0}; - clos1.push_back(c0); - acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow, NULL, prectbl, 0); - find_state(*this, dfa_t::NIL, 0/* any */, kernels, clos2, acts, dump, prectbl); - - for (size_t i = 0; i < kernels.size(); ++i) { - newvers.clear(); - for (size_t c = 0; c < nchars; ++c) { - const kernel_t *kernel = kernels[i]; - reach(kernel, clos1, charset[c]); - acts = closure(*this, clos1, clos2, tagpool, newvers, dump.shadow, kernel->prectbl, prectbl, kernel->size); - find_state(*this, i, c, kernels, clos2, acts, dump, prectbl); + for (uint32_t c = 0; c < nchars; ++c) { + ctx.dc_symbol = c; + + reach_on_symbol(ctx); + tagged_epsilon_closure(ctx); + find_state(ctx); } } - if (!opts->posix_captures) { - warn_nondeterministic_tags(kernels, tagpool, tags, rules, cond, warn); - } + warn_nondeterministic_tags(ctx); } @@ -137,17 +148,23 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, // used in each kernel (degree of non-determinism) and warn about tags with // maximum degree two or more. // WARNING: this function assumes that kernel items are grouped by rule -void warn_nondeterministic_tags(const kernels_t &kernels, - const Tagpool &tagpool, const std::vector &tags, - const std::valarray &rules, const std::string &cond, Warn &warn) +void warn_nondeterministic_tags(const determ_context_t &ctx) { + if (ctx.dc_opts->posix_captures) return; + + Warn &warn = ctx.dc_warn; + const kernels_t &kernels = ctx.dc_kernels; + const std::vector &tags = ctx.dc_dfa.tags; + const std::valarray &rules = ctx.dc_dfa.rules; + const size_t - ntag = tagpool.ntags, - nkrn = kernels.size(); + ntag = tags.size(), + nkrn = kernels.size(), + nrule = rules.size(); std::vector maxv(ntag, 0); std::set uniq; - for (size_t i = 0; i < nkrn; ++i) { + for (uint32_t i = 0; i < nkrn; ++i) { const kernel_t *k = kernels[i]; nfa_state_t **s = k->state; const size_t n = k->size; @@ -162,27 +179,50 @@ void warn_nondeterministic_tags(const kernels_t &kernels, for (size_t t = rule.ltag; t < rule.htag; ++t) { uniq.clear(); for (size_t m = l; m < u; ++m) { - uniq.insert(tagpool[v[m]][t]); + uniq.insert(ctx.dc_tagpool[v[m]][t]); } maxv[t] = std::max(maxv[t], uniq.size()); } } } - const size_t nrule = rules.size(); - for (size_t r = 0; r < nrule; ++r) { + for (uint32_t r = 0; r < nrule; ++r) { const Rule &rule = rules[r]; for (size_t t = rule.ltag; t < rule.htag; ++t) { const size_t m = maxv[t]; if (m > 1) { const uint32_t line = rule.code->fline; - warn.nondeterministic_tags(line, cond, tags[t].name, m); + warn.nondeterministic_tags(line, ctx.dc_condname, tags[t].name, m); } } } } +determ_context_t::determ_context_t(const opt_t *opts, Warn &warn + , const std::string &condname, const nfa_t &nfa, dfa_t &dfa) + : dc_opts(opts) + , dc_warn(warn) + , dc_condname(condname) + , dc_nfa(nfa) + , dc_dfa(dfa) + , dc_origin(dfa_t::NIL) + , dc_target(dfa_t::NIL) + , dc_symbol(0) + , dc_actions(NULL) + , dc_reached() + , dc_closure() + , dc_prectbl(NULL) + , dc_tagpool(opts, nfa.tags.size()) + , dc_allocator(dc_tagpool.alc) + , dc_tagtrie(dc_tagpool.history) + , dc_kernels() + , dc_buffers(dc_allocator) + , dc_newvers(newver_cmp_t(dc_tagtrie)) + , dc_dump(opts) +{} + + dfa_t::~dfa_t() { std::vector::iterator diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h new file mode 100644 index 00000000..4ac2a502 --- /dev/null +++ b/re2c/src/dfa/determinization.h @@ -0,0 +1,64 @@ +#ifndef _RE2C_DFA_DETERMINIZATION_ +#define _RE2C_DFA_DETERMINIZATION_ + +#include "src/util/c99_stdint.h" +#include + +#include "src/dfa/closure.h" +#include "src/dfa/dump.h" +#include "src/dfa/tagpool.h" +#include "src/dfa/tagtree.h" +#include "src/dfa/find_state.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +// fwd +struct opt_t; +struct Warn; +struct nfa_t; +struct dfa_t; + +struct determ_context_t +{ + // determinization input + const opt_t *dc_opts; // options + Warn &dc_warn; // warnings + const std::string &dc_condname; // the name of current condition (with -c) + const nfa_t &dc_nfa; // TNFA + + // determinization output + dfa_t &dc_dfa; // resulting TDFA +// tcpool_t &dc_tcmdpool; // pool of tag actions +// uint32_t dc_maxtagver; // maximal tag version +// tagver_t *dc_finvers; // tag versions used in final states +// std::set &dc_mtagvers; // the set of m-tags + + // temporary structures used by determinization + uint32_t dc_origin; // from-state of the current transition + uint32_t dc_target; // to-state of the current transition + uint32_t dc_symbol; // alphabet symbol of the current transition + tcmd_t *dc_actions; // tag actions of the current transition + closure_t dc_reached; + closure_t dc_closure; + prectable_t *dc_prectbl; // precedence table for Okui POSIX disambiguation + Tagpool dc_tagpool; + allocator_t &dc_allocator; + tagtree_t &dc_tagtrie; // prefix trie of tag histories + kernels_t dc_kernels; // TDFA states under construction + kernel_buffers_t dc_buffers; + newvers_t dc_newvers; + dump_dfa_t dc_dump; + + determ_context_t(const opt_t *, Warn &, const std::string &, const nfa_t &, dfa_t &); + FORBID_COPY(determ_context_t); +}; + +void tagged_epsilon_closure(determ_context_t &ctx); +void find_state(determ_context_t &ctx); +int32_t precedence(determ_context_t &, const clos_t &, const clos_t &, int32_t &, int32_t &); + +} // namespace re2c + +#endif // _RE2C_DFA_DETERMINIZATION_ diff --git a/re2c/src/dfa/dfa.h b/re2c/src/dfa/dfa.h index 2135623f..a6a17ba1 100644 --- a/re2c/src/dfa/dfa.h +++ b/re2c/src/dfa/dfa.h @@ -47,7 +47,7 @@ struct dfa_state_t struct dfa_t { - static const size_t NIL; + static const uint32_t NIL; std::vector states; const size_t nchars; diff --git a/re2c/src/dfa/dump.cc b/re2c/src/dfa/dump.cc index cf74a927..bf208d10 100644 --- a/re2c/src/dfa/dump.cc +++ b/re2c/src/dfa/dump.cc @@ -6,6 +6,7 @@ #include "src/conf/opt.h" #include "src/dfa/dfa.h" +#include "src/dfa/determinization.h" #include "src/dfa/dump.h" #include "src/dfa/find_state.h" #include "src/dfa/tagpool.h" @@ -20,24 +21,19 @@ namespace re2c static void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sym, const tcpool_t &tcpool); static const char *tagname(const Tag &t); -static void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers); +static void dump_tags(const Tagpool &tagpool, hidx_t ttran, uint32_t tvers); -dump_dfa_t::dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n) - : debug(pool.opts->dump_dfa_raw) - , dfa(d) - , tagpool(pool) +dump_dfa_t::dump_dfa_t(const opt_t *opts) + : debug(opts->dump_dfa_raw) , uniqidx(0) - , base(n.states) - , shadow(NULL) { if (!debug) return; - shadow = new closure_t; fprintf(stderr, "digraph DFA {\n" " rankdir=LR\n" - " node[shape=plaintext fontname=fixed]\n" - " edge[arrowhead=vee fontname=fixed]\n\n"); + " node[shape=plaintext fontname=Courier]\n" + " edge[arrowhead=vee fontname=Courier]\n\n"); } @@ -45,17 +41,10 @@ dump_dfa_t::~dump_dfa_t() { if (!debug) return; - delete shadow; fprintf(stderr, "}\n"); } -uint32_t dump_dfa_t::index(const nfa_state_t *s) const -{ - return static_cast(s - base); -} - - static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i) { if (i == HROOT) { @@ -77,146 +66,107 @@ static void dump_history(const dfa_t &dfa, const tagtree_t &h, hidx_t i) } -void dump_dfa_t::closure_tags(cclositer_t c) +void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) { if (!debug) return; - if (c->tvers == ZERO_TAGS) return; - - const hidx_t l = c->tlook; - const tagver_t *vers = tagpool[c->tvers]; - const size_t ntag = tagpool.ntags; - - for (size_t t = 0; t < ntag; ++t) { - fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t])); - } - - if (l != HROOT) { - dump_history(dfa, tagpool.history, l); - } -} + const closure_t &closure = ctx.dc_closure; + cclositer_t b = closure.begin(), e = closure.end(), c; + const uint32_t origin = ctx.dc_origin; + const uint32_t target = ctx.dc_target; + const uint32_t symbol = ctx.dc_symbol; + const dfa_t &dfa = ctx.dc_dfa; + const Tagpool &tagpool = ctx.dc_tagpool; + uint32_t i; -void dump_dfa_t::closure(const closure_t &clos, uint32_t state, bool isnew) -{ - if (!debug) return; + if (target == dfa_t::NIL) return; - cclositer_t c1 = clos.begin(), c2 = clos.end(), c, - s1 = shadow->begin(), s2 = shadow->end(), s; - const char - *style = isnew ? "" : " STYLE=\"dotted\"", - *color = " COLOR=\"lightgray\""; - uint32_t i; + const uint32_t state = isnew ? target : ++uniqidx; + const char *prefix = isnew ? "" : "i"; + const char *style = isnew ? "" : " STYLE=\"dotted\""; + // closure fprintf(stderr, " %s%u [label=<", isnew ? "" : "i", state); - + ">", prefix, state); i = 0; - for (s = s1; s != s2; ++s, ++i) { - fprintf(stderr, "%u", - i, i, color, style, color, index(s->state)); - closure_tags(s); - fprintf(stderr, ""); - } - if (!shadow->empty()) { - fprintf(stderr, ""); - } - i = 0; - for (c = c1; c != c2; ++c, ++i) { + for (c = b; c != e; ++c, ++i) { fprintf(stderr, "%u", - i, style, index(c->state)); - closure_tags(c); + i, style, static_cast(c->state - ctx.dc_nfa.states)); + + if (c->tvers != ZERO_TAGS) { + const tagver_t *vers = tagpool[c->tvers]; + const size_t ntag = dfa.tags.size(); + + for (size_t t = 0; t < ntag; ++t) { + fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t])); + } + + if (c->tlook != HROOT) { + dump_history(dfa, ctx.dc_tagtrie, c->tlook); + } + } + fprintf(stderr, ""); } fprintf(stderr, ">]\n"); -} + // transitions (initial state) + if (origin == dfa_t::NIL) { + fprintf(stderr, " void [shape=point]\n"); -void dump_dfa_t::state0(const closure_t &clos) -{ - if (!debug) return; - - uint32_t i; - closure(clos, 0, true); - fprintf(stderr, " void [shape=point]\n"); - i = 0; - for (cclositer_t c = shadow->begin(); c != shadow->end(); ++c, ++i) { - fprintf(stderr, " void -> 0:_%u_%u:w [style=dotted color=lightgray fontcolor=lightgray label=\"", - i, i); - dump_tags(tagpool, c->ttran, c->tvers); - fprintf(stderr, "\"]\n"); - } - i = 0; - for (cclositer_t c = clos.begin(); c != clos.end(); ++c, ++i) { - fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", i); - dump_tags(tagpool, c->ttran, c->tvers); - fprintf(stderr, "\"]\n"); + uint32_t i = 0; + for (c = b; c != e; ++c, ++i) { + fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", i); + dump_tags(tagpool, c->ttran, c->tvers); + fprintf(stderr, "\"]\n"); + } } -} + // transitions (other states) + else { + if (!isnew) { + fprintf(stderr, + " i%u [style=dotted]\n" + " i%u:s -> %u:s [style=dotted label=\"", + state, state, origin); + dump_tcmd(dfa.states[origin]->tcmd[symbol]); + fprintf(stderr, "\"]\n"); + } -void dump_dfa_t::state(const closure_t &clos, size_t state, size_t symbol, bool isnew) -{ - if (!debug) return; + uint32_t i = 0; + for (c = b; c != e; ++c, ++i) { + fprintf(stderr, + " %u:%u:e -> %s%u:%u:w [label=\"%u", + origin, c->origin, prefix, state, i, symbol); + dump_tags(tagpool, c->ttran, c->tvers); + fprintf(stderr, "\"]\n"); + } + } - const dfa_state_t *s = dfa.states[state]; - const size_t state2 = s->arcs[symbol]; + // if final state, dump finalizer + const dfa_state_t *t = dfa.states[target]; + if (t->rule != Rule::NONE) { + const Rule &r = dfa.rules[t->rule]; + const tcmd_t *cmd = t->tcmd[dfa.nchars]; - if (state2 == dfa_t::NIL) return; + // see note [at most one final item per closure] + c = std::find_if(b, e, clos_t::fin); + assert(c != e); - const tcmd_t *cmd = s->tcmd[symbol]; - const uint32_t - a = static_cast(symbol), - x = static_cast(state), - y = static_cast(state2), - z = isnew ? y : ++uniqidx; - const char *prefix = isnew ? "" : "i"; - uint32_t i; + fprintf(stderr, " r%u [shape=none label=\"(", state); + for (size_t t = r.ltag; t < r.htag; ++t) { + if (t > r.ltag) fprintf(stderr, " "); + fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t])); + } + fprintf(stderr, ")\"]\n"); - closure(clos, z, isnew); - if (!isnew) { - fprintf(stderr, " i%u [style=dotted]\n" - " i%u:s -> %u:s [style=dotted label=\"", z, z, y); + fprintf(stderr, " %u:%u:e -> r%u [style=dotted label=\"", + state, c->origin, state); dump_tcmd(cmd); fprintf(stderr, "\"]\n"); } - i = 0; - for (cclositer_t b = shadow->begin(), c = b; c != shadow->end(); ++c, ++i) { - fprintf(stderr, " %u:%u:e -> %s%u:_%u_%u:w [color=lightgray fontcolor=lightgray label=\"%u", - x, c->origin, prefix, z, i, i, a); - dump_tags(tagpool, c->ttran, c->tvers); - fprintf(stderr, "\"]\n"); - } - i = 0; - for (cclositer_t c = clos.begin(); c != clos.end(); ++c, ++i) { - fprintf(stderr, " %u:%u:e -> %s%u:%u:w [label=\"%u", - x, c->origin, prefix, z, i, a); - dump_tags(tagpool, c->ttran, c->tvers); - fprintf(stderr, "\"]\n"); - } -} - - -void dump_dfa_t::final(size_t state, const nfa_state_t *port) -{ - if (!debug) return; - - const dfa_state_t *s = dfa.states[state]; - const Rule &r = dfa.rules[s->rule]; - const tcmd_t *cmd = s->tcmd[dfa.nchars]; - const uint32_t x = static_cast(state); - - fprintf(stderr, " r%u [shape=none label=\"(", x); - for (size_t t = r.ltag; t < r.htag; ++t) { - if (t > r.ltag) fprintf(stderr, " "); - fprintf(stderr, "%s%d", tagname(dfa.tags[t]), abs(dfa.finvers[t])); - } - fprintf(stderr, ")\"]\n"); - - fprintf(stderr, " %u:%u:e -> r%u [style=dotted label=\"", x, index(port), x); - dump_tcmd(cmd); - fprintf(stderr, "\"]\n"); } @@ -229,8 +179,8 @@ void dump_dfa(const dfa_t &dfa) fprintf(stderr, "digraph DFA {\n" " rankdir=LR\n" - " node[shape=Mrecord fontname=fixed]\n" - " edge[arrowhead=vee fontname=fixed]\n\n"); + " node[shape=Mrecord fontname=Courier]\n" + " edge[arrowhead=vee fontname=Courier]\n\n"); // initializer fprintf(stderr, @@ -321,7 +271,7 @@ const char *tagname(const Tag &t) } -void dump_tags(const Tagpool &tagpool, hidx_t ttran, size_t tvers) +void dump_tags(const Tagpool &tagpool, hidx_t ttran, uint32_t tvers) { if (ttran == HROOT) return; diff --git a/re2c/src/dfa/dump.h b/re2c/src/dfa/dump.h index 6a7980a4..52a41d9e 100644 --- a/re2c/src/dfa/dump.h +++ b/re2c/src/dfa/dump.h @@ -4,38 +4,22 @@ #include #include "src/util/c99_stdint.h" -#include "src/dfa/closure.h" -#include "src/dfa/dfa.h" -#include "src/util/forbid_copy.h" - namespace re2c { -struct Tagpool; +struct determ_context_t; struct dfa_t; -struct nfa_state_t; -struct nfa_t; +struct opt_t; struct tcmd_t; -struct kernels_t; struct dump_dfa_t { const bool debug; - const dfa_t &dfa; - const Tagpool &tagpool; uint32_t uniqidx; - const nfa_state_t *base; - closure_t *shadow; - dump_dfa_t(const dfa_t &d, const Tagpool &pool, const nfa_t &n); + explicit dump_dfa_t(const opt_t *); ~dump_dfa_t(); - void closure_tags(cclositer_t c); - void closure(const closure_t &clos, uint32_t state, bool isnew); - void state0(const closure_t &clos); - void state(const closure_t &clos, size_t state, size_t symbol, bool isnew); - void final(size_t state, const nfa_state_t *port); - uint32_t index(const nfa_state_t *s) const; - FORBID_COPY(dump_dfa_t); + void state(const determ_context_t &, bool); }; void dump_dfa(const dfa_t &dfa); diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc index b60062cd..7e54476d 100644 --- a/re2c/src/dfa/find_state.cc +++ b/re2c/src/dfa/find_state.cc @@ -4,6 +4,7 @@ #include #include +#include "src/dfa/determinization.h" #include "src/dfa/dfa.h" #include "src/dfa/dump.h" #include "src/dfa/find_state.h" @@ -79,18 +80,26 @@ namespace re2c struct kernel_eq_t { - Tagpool &tagpool; - const std::vector &tags; - + const determ_context_t &ctx; bool operator()(const kernel_t *, const kernel_t *) const; }; -static void reserve_buffers(kernel_buffers_t &, allocator_t &, tagver_t, size_t); +struct kernel_map_t +{ + determ_context_t &ctx; + bool operator()(const kernel_t *, const kernel_t *); +}; + + static kernel_t *make_new_kernel(size_t, allocator_t &); static kernel_t *make_kernel_copy(const kernel_t *, allocator_t &); -static uint32_t hash_kernel(const kernel_t *kernel); static void copy_to_buffer_kernel(const closure_t &, const prectable_t *, kernel_t *); +static void reserve_buffers(determ_context_t &); +static uint32_t hash_kernel(const kernel_t *kernel); +static bool equal_lookahead_tags(const kernel_t *, const kernel_t *, const determ_context_t &); +static bool do_find_state(determ_context_t &ctx); +static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin); kernel_buffers_t::kernel_buffers_t(allocator_t &alc) @@ -107,16 +116,6 @@ kernel_buffers_t::kernel_buffers_t(allocator_t &alc) {} -kernels_t::kernels_t(Tagpool &tagp, tcpool_t &tcp, const std::vector &ts) - : lookup() - , tagpool(tagp) - , tcpool(tcp) - , tags(ts) - , buffers(tagp.alc) - , pacts(NULL) -{} - - kernel_t *make_new_kernel(size_t size, allocator_t &alc) { kernel_t *k = alc.alloct(1); @@ -150,11 +149,15 @@ kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc) } -void reserve_buffers(kernel_buffers_t &kbufs, allocator_t &alc, - tagver_t maxver, size_t maxkern) +void reserve_buffers(determ_context_t &ctx) { - if (kbufs.maxsize < maxkern) { - kbufs.maxsize = maxkern * 2; // in advance + kernel_buffers_t &kbufs = ctx.dc_buffers; + allocator_t &alc = ctx.dc_allocator; + const tagver_t maxver = ctx.dc_dfa.maxtagver; + const size_t nkern = ctx.dc_closure.size(); + + if (kbufs.maxsize < nkern) { + kbufs.maxsize = nkern * 2; // in advance kbufs.kernel = make_new_kernel(kbufs.maxsize, alc); } @@ -225,25 +228,31 @@ void copy_to_buffer_kernel(const closure_t &closure, } -static bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y, - Tagpool &tagpool, const std::vector &tags) +bool equal_lookahead_tags(const kernel_t *x, const kernel_t *y, const determ_context_t &ctx) { + assert(x->size == y->size); + if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) { return true; } - tagtree_t &h = tagpool.history; + + tagtree_t &trie = ctx.dc_tagtrie; + const Tagpool &tagpool = ctx.dc_tagpool; + const std::vector &tags = ctx.dc_dfa.tags; + for (size_t i = 0; i < x->size; ++i) { const hidx_t xl = x->tlook[i], yl = y->tlook[i]; for (size_t t = 0; t < tagpool.ntags; ++t) { if (history(tags[t])) { // compare full tag sequences - if (h.compare_reversed(xl, yl, t) != 0) return false; + if (trie.compare_reversed(xl, yl, t) != 0) return false; } else { // compare only the last pair of tags - if (h.last(xl, t) != h.last(yl, t)) return false; + if (trie.last(xl, t) != trie.last(yl, t)) return false; } } } + return true; } @@ -257,11 +266,11 @@ bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const && memcmp(x->state, y->state, n * sizeof(void*)) == 0 && memcmp(x->tvers, y->tvers, n * sizeof(size_t)) == 0 && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0) - && equal_lookahead_tags(x, y, tagpool, tags); + && equal_lookahead_tags(x, y, ctx); } -bool kernels_t::operator()(const kernel_t *x, const kernel_t *y) +bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y) { // check that kernel sizes, NFA states lookahead tags // and precedence table coincide (versions might differ) @@ -269,26 +278,28 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y) const bool compatible = n == y->size && memcmp(x->state, y->state, n * sizeof(void*)) == 0 && (!x->prectbl || memcmp(x->prectbl, y->prectbl, n * n * sizeof(prectable_t)) == 0) - && equal_lookahead_tags(x, y, tagpool, tags); + && equal_lookahead_tags(x, y, ctx); if (!compatible) return false; - tagver_t *x2y = buffers.x2y, *y2x = buffers.y2x, max = buffers.max; - size_t *x2t = buffers.x2t; + const std::vector &tags = ctx.dc_dfa.tags; + const size_t ntag = tags.size(); + kernel_buffers_t &bufs = ctx.dc_buffers; + tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max; + size_t *x2t = bufs.x2t; // map tag versions of one kernel to that of another // and check that lookahead versions (if any) coincide - const size_t ntag = tagpool.ntags; std::fill(x2y - max, x2y + max, TAGVER_ZERO); std::fill(y2x - max, y2x + max, TAGVER_ZERO); for (size_t i = 0; i < n; ++i) { const tagver_t - *xvs = tagpool[x->tvers[i]], - *yvs = tagpool[y->tvers[i]]; + *xvs = ctx.dc_tagpool[x->tvers[i]], + *yvs = ctx.dc_tagpool[y->tvers[i]]; const hidx_t xl = x->tlook[i]; for (size_t t = 0; t < ntag; ++t) { // see note [mapping ignores items with lookahead tags] - if (tagpool.history.last(xl, t) != TAGVER_ZERO + if (ctx.dc_tagtrie.last(xl, t) != TAGVER_ZERO && !history(tags[t])) continue; const tagver_t xv = xvs[t], yv = yvs[t]; @@ -305,7 +316,8 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y) } // we have bijective mapping; now try to create list of commands - tcmd_t *b1 = buffers.backup_actions, *b2 = b1, *a, **pa, *copy = NULL; + tcmd_t **pacts = &ctx.dc_actions, *a, **pa, *copy = NULL; + tcmd_t *b1 = bufs.backup_actions, *b2 = b1; // backup 'save' commands: if topsort finds cycles, this mapping // will be rejected and we'll have to revert all changes @@ -328,7 +340,7 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y) const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv); if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) { assert(axv != ayv); - copy = tcpool.make_copy(copy, axv, ayv); + copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv); } } @@ -338,7 +350,7 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y) *pacts = copy; // see note [topological ordering of copy commands] - const bool nontrivial_cycles = tcmd_t::topsort(pacts, buffers.indegree); + const bool nontrivial_cycles = tcmd_t::topsort(pacts, bufs.indegree); // in case of cycles restore 'save' commands and fail if (nontrivial_cycles) { @@ -351,59 +363,59 @@ bool kernels_t::operator()(const kernel_t *x, const kernel_t *y) } -size_t kernels_t::insert(const closure_t &closure, tagver_t maxver, - const prectable_t *prectbl, tcmd_t *&acts, bool &is_new) +bool do_find_state(determ_context_t &ctx) { - const size_t nkern = closure.size(); - size_t x = dfa_t::NIL; - is_new = false; + kernels_t &kernels = ctx.dc_kernels; + const closure_t &closure = ctx.dc_closure; // empty closure corresponds to default state - if (nkern == 0) { - acts = NULL; - return x; + if (closure.size() == 0) { + ctx.dc_target = dfa_t::NIL; + ctx.dc_actions = NULL; + return false; } // resize buffer if closure is too large - reserve_buffers(buffers, tagpool.alc, maxver, nkern); - kernel_t *k = buffers.kernel; + reserve_buffers(ctx); + kernel_t *k = ctx.dc_buffers.kernel; // copy closure to buffer kernel - copy_to_buffer_kernel(closure, prectbl, k); + copy_to_buffer_kernel(closure, ctx.dc_prectbl, k); // hash "static" part of the kernel const uint32_t hash = hash_kernel(k); // try to find identical kernel - kernel_eq_t cmp_eq = {tagpool, tags}; - x = lookup.find_with(hash, k, cmp_eq); - if (x != index_t::NIL) return x; + kernel_eq_t cmp_eq = {ctx}; + ctx.dc_target = kernels.find_with(hash, k, cmp_eq); + if (ctx.dc_target != kernels_t::NIL) return false; // else try to find mappable kernel // see note [bijective mappings] - this->pacts = &acts; - x = lookup.find_with(hash, k, *this); - if (x != index_t::NIL) return x; + kernel_map_t cmp_map = {ctx}; + ctx.dc_target = kernels.find_with(hash, k, cmp_map); + if (ctx.dc_target != kernels_t::NIL) return false; // otherwise add new kernel - x = lookup.push(hash, make_kernel_copy(k, tagpool.alc)); - is_new = true; - return x; + kernel_t *kcopy = make_kernel_copy(k, ctx.dc_allocator); + ctx.dc_target = kernels.push(hash, kcopy); + return true; } -static tcmd_t *finalizer(const clos_t &clos, size_t ridx, - dfa_t &dfa, const Tagpool &tagpool, const std::vector &tags) +tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin) { + dfa_t &dfa = ctx.dc_dfa; + const Rule &rule = dfa.rules[fin.state->rule]; + const tagver_t *vers = ctx.dc_tagpool[fin.tvers]; + const hidx_t look = fin.tlook; + const tagtree_t &hist = ctx.dc_tagtrie; tcpool_t &tcpool = dfa.tcpool; - const Rule &rule = dfa.rules[ridx]; - const tagver_t *vers = tagpool[clos.tvers]; - const tagtree_t &hist = tagpool.history; - const hidx_t look = clos.tlook; tcmd_t *copy = NULL, *save = NULL, **p; for (size_t t = rule.ltag; t < rule.htag; ++t) { - const Tag &tag = tags[t]; + + const Tag &tag = dfa.tags[t]; if (fixed(tag)) continue; const tagver_t v = abs(vers[t]), l = hist.last(look, t); @@ -425,11 +437,12 @@ static tcmd_t *finalizer(const clos_t &clos, size_t ridx, } -void find_state(dfa_t &dfa, size_t origin, size_t symbol, kernels_t &kernels, - const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump, const prectable_t *prectbl) +void find_state(determ_context_t &ctx) { - bool is_new; - const size_t state = kernels.insert(closure, dfa.maxtagver, prectbl, acts, is_new); + dfa_t &dfa = ctx.dc_dfa; + + // find or add the new state in the existing set of states + const bool is_new = do_find_state(ctx); if (is_new) { // create new DFA state @@ -438,25 +451,27 @@ void find_state(dfa_t &dfa, size_t origin, size_t symbol, kernels_t &kernels, // check if the new state is final // see note [at most one final item per closure] - cclositer_t c1 = closure.begin(), c2 = closure.end(), - c = std::find_if(c1, c2, clos_t::fin); - if (c != c2) { - t->rule = c->state->rule; - t->tcmd[dfa.nchars] = finalizer(*c, t->rule, dfa, - kernels.tagpool, kernels.tags); - dump.final(state, c->state); + cclositer_t + b = ctx.dc_closure.begin(), + e = ctx.dc_closure.end(), + f = std::find_if(b, e, clos_t::fin); + if (f != e) { + t->tcmd[dfa.nchars] = final_actions(ctx, *f); + t->rule = f->state->rule; } } - if (origin == dfa_t::NIL) { // initial state - dfa.tcmd0 = acts; - dump.state0(closure); - } else { - dfa_state_t *s = dfa.states[origin]; - s->arcs[symbol] = state; - s->tcmd[symbol] = acts; - dump.state(closure, origin, symbol, is_new); + if (ctx.dc_origin == dfa_t::NIL) { + // initial state + dfa.tcmd0 = ctx.dc_actions; } + else { + dfa_state_t *s = dfa.states[ctx.dc_origin]; + s->arcs[ctx.dc_symbol] = ctx.dc_target; + s->tcmd[ctx.dc_symbol] = ctx.dc_actions; + } + + ctx.dc_dump.state(ctx, is_new); } } // namespace re2c diff --git a/re2c/src/dfa/find_state.h b/re2c/src/dfa/find_state.h index 913088f1..5f65d19b 100644 --- a/re2c/src/dfa/find_state.h +++ b/re2c/src/dfa/find_state.h @@ -49,32 +49,7 @@ struct kernel_buffers_t explicit kernel_buffers_t(allocator_t &alc); }; -struct kernels_t -{ - typedef lookup_t index_t; - index_t lookup; - -public: - Tagpool &tagpool; - tcpool_t &tcpool; - const std::vector &tags; - -private: - kernel_buffers_t buffers; - - tcmd_t **pacts; - -public: - kernels_t(Tagpool &tagp, tcpool_t &tcp, const std::vector &ts); - inline size_t size() const { return lookup.size(); } - inline const kernel_t* operator[](size_t idx) const { return lookup[idx]; } - size_t insert(const closure_t &clos, tagver_t maxver, const prectable_t *prectbl, tcmd_t *&acts, bool &is_new); - bool operator()(const kernel_t *k1, const kernel_t *k2); - FORBID_COPY(kernels_t); -}; - -void find_state(dfa_t &dfa, size_t state, size_t symbol, kernels_t &kernels, - const closure_t &closure, tcmd_t *acts, dump_dfa_t &dump, const prectable_t *prectbl); +typedef lookup_t kernels_t; } // namespace re2c diff --git a/re2c/src/dfa/tagtree.cc b/re2c/src/dfa/tagtree.cc index bf056f98..07ec71e8 100644 --- a/re2c/src/dfa/tagtree.cc +++ b/re2c/src/dfa/tagtree.cc @@ -2,6 +2,7 @@ #include #include "src/dfa/closure.h" +#include "src/dfa/determinization.h" #include "src/dfa/tagtree.h" namespace re2c @@ -61,7 +62,7 @@ int32_t tagtree_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const static void reconstruct_history(const tagtree_t &history, - std::vector &path, hidx_t idx) + tag_path_t &path, hidx_t idx) { path.clear(); for (; idx != HROOT; idx = history.pred(idx)) { @@ -84,9 +85,8 @@ static inline int32_t unpack_leftmost(int32_t value) } -int32_t tagtree_t::precedence(const clos_t &x, const clos_t &y, - int32_t &rhox, int32_t &rhoy, const prectable_t *prectbl, - const std::vector &tags, size_t nclos) +int32_t precedence(determ_context_t &ctx, + const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy) { const hidx_t xl = x.tlook, yl = y.tlook; const uint32_t xo = x.origin, yo = y.origin; @@ -96,23 +96,36 @@ int32_t tagtree_t::precedence(const clos_t &x, const clos_t &y, return 0; } - reconstruct_history(*this, path1, xl); - reconstruct_history(*this, path2, yl); - std::vector::const_reverse_iterator - i1 = path1.rbegin(), e1 = path1.rend(), j1 = i1, g1, - i2 = path2.rbegin(), e2 = path2.rend(), j2 = i2, g2; + tagtree_t &trie = ctx.dc_tagtrie; + tag_path_t &p1 = trie.path1, &p2 = trie.path2; + reconstruct_history(trie, p1, xl); + reconstruct_history(trie, p2, yl); + tag_path_t::const_reverse_iterator + i1 = p1.rbegin(), e1 = p1.rend(), j1 = i1, g1, + i2 = p2.rbegin(), e2 = p2.rend(), j2 = i2, g2; + + const std::vector &tags = ctx.dc_dfa.tags; + size_t nclos = 0; + const prectable_t *prectbl = NULL; const bool fork_frame = xo == yo; - // find fork if (fork_frame) { + // find fork for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2); } + else { + // get precedence table and size of the origin state + const kernel_t *k = ctx.dc_kernels[ctx.dc_origin]; + nclos = k->size; + prectbl = k->prectbl; + } // longest precedence if (!fork_frame) { rhox = unpack_longest(prectbl[xo * nclos + yo]); rhoy = unpack_longest(prectbl[yo * nclos + xo]); - } else { + } + else { rhox = rhoy = std::numeric_limits::max(); if (j1 > i1) rhox = rhoy = tags[(j1 - 1)->idx].height; } diff --git a/re2c/src/dfa/tagtree.h b/re2c/src/dfa/tagtree.h index 921717d2..f13bd120 100644 --- a/re2c/src/dfa/tagtree.h +++ b/re2c/src/dfa/tagtree.h @@ -18,6 +18,8 @@ struct clos_t; static const hidx_t HROOT = ~0u; +typedef std::vector tag_path_t; + struct tagtree_t { // the whole tree of tags found by the epsilon-closure @@ -29,8 +31,8 @@ struct tagtree_t std::vector nodes; // reconstruct paths for comparison - std::vector path1; - std::vector path2; + tag_path_t path1; + tag_path_t path2; tagtree_t(); hidx_t pred(hidx_t i) const; @@ -40,8 +42,6 @@ struct tagtree_t hidx_t push(hidx_t i, tag_info_t info); tagver_t last(hidx_t i, size_t t) const; int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const; - int32_t precedence(const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy, - const prectable_t *prectbl, const std::vector &tags, size_t nclos); FORBID_COPY(tagtree_t); };