From c2878dd100e92abe684ee38f558b17b453ed1b5a Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sat, 2 Mar 2019 09:10:51 +0000 Subject: [PATCH] Deduplicated POSIX closure computation in re2c and libre2c. --- re2c/Makefile.am | 2 +- re2c/Makefile.lib.am | 2 +- re2c/lib/regex_impl.h | 10 +- re2c/lib/regexec.cc | 4 +- re2c/lib/regexec_nfa_posix.cc | 243 ++---------------- re2c/src/dfa/closure.cc | 1 + re2c/src/dfa/closure_leftmost.cc | 4 +- .../dfa/{closure_posix.cc => closure_posix.h} | 134 +++++----- re2c/src/dfa/determinization.cc | 34 +-- re2c/src/dfa/determinization.h | 33 ++- 10 files changed, 141 insertions(+), 326 deletions(-) rename re2c/src/dfa/{closure_posix.cc => closure_posix.h} (67%) diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 46f6d1f4..d1d92938 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -23,6 +23,7 @@ re2c_HDR = \ src/adfa/action.h \ src/adfa/adfa.h \ src/cfg/cfg.h \ + src/dfa/closure_posix.h \ src/dfa/determinization.h \ src/dfa/dfa.h \ src/dfa/posix_precedence.h \ @@ -110,7 +111,6 @@ re2c_SRC = \ src/cfg/varalloc.cc \ src/dfa/closure.cc \ src/dfa/closure_leftmost.cc \ - src/dfa/closure_posix.cc \ src/dfa/dead_rules.cc \ src/dfa/determinization.cc \ src/dfa/fallback_tags.cc \ diff --git a/re2c/Makefile.lib.am b/re2c/Makefile.lib.am index 5501bb11..4f44f706 100644 --- a/re2c/Makefile.lib.am +++ b/re2c/Makefile.lib.am @@ -20,6 +20,7 @@ libre2c_la_HDR = \ src/adfa/action.h \ src/adfa/adfa.h \ src/cfg/cfg.h \ + src/dfa/closure_posix.h \ src/dfa/determinization.h \ src/dfa/dfa.h \ src/dfa/posix_precedence.h \ @@ -97,7 +98,6 @@ libre2c_la_SRC = \ src/cfg/varalloc.cc \ src/dfa/closure.cc \ src/dfa/closure_leftmost.cc \ - src/dfa/closure_posix.cc \ src/debug/dump_adfa.cc \ src/debug/dump_cfg.cc \ src/debug/dump_dfa.cc \ diff --git a/re2c/lib/regex_impl.h b/re2c/lib/regex_impl.h index 8295eb31..9d23b6cd 100644 --- a/re2c/lib/regex_impl.h +++ b/re2c/lib/regex_impl.h @@ -25,6 +25,10 @@ struct conf_t inline conf_t(): state(NULL), origin(0), thist(HROOT) {} inline conf_t(nfa_state_t *s, uint32_t o, int32_t h) : state(s), origin(o), thist(h) {} + inline conf_t(const conf_t &c, nfa_state_t *s) + : state(s), origin(c.origin), thist(c.thist) {} + inline conf_t(const conf_t &c, nfa_state_t *s, int32_t h) + : state(s), origin(c.origin), thist(h) {} }; struct ran_or_fin_t @@ -48,6 +52,7 @@ typedef confset_t::const_reverse_iterator rcconfiter_t; struct simctx_t { + typedef libre2c::conf_t conf_t; typedef std::vector confset_t; typedef confset_t::iterator confiter_t; typedef confset_t::const_iterator cconfiter_t; @@ -58,9 +63,6 @@ struct simctx_t const size_t nsub; const int flags; - confset_t reach; - confset_t state; - tag_history_t history; int32_t hidx; @@ -85,6 +87,8 @@ struct simctx_t std::vector worklist; cache_t cache; + confset_t reach; + confset_t state; std::vector gor1_topsort; std::vector gor1_linear; std::vector gtop_heap_storage; diff --git a/re2c/lib/regexec.cc b/re2c/lib/regexec.cc index 4ecc7338..15fd75d3 100644 --- a/re2c/lib/regexec.cc +++ b/re2c/lib/regexec.cc @@ -71,8 +71,6 @@ simctx_t::simctx_t(const nfa_t &nfa, size_t re_nsub, int flags) : nfa(nfa) , nsub(2 * (re_nsub - 1)) , flags(flags) - , reach() - , state() , history() , hidx(HROOT) , step(0) @@ -91,6 +89,8 @@ simctx_t::simctx_t(const nfa_t &nfa, size_t re_nsub, int flags) , fincount() , worklist() , cache() + , reach() + , state() , gor1_topsort() , gor1_linear() , gtop_heap_storage() diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc index a6f2780d..c4e4fd7e 100644 --- a/re2c/lib/regexec_nfa_posix.cc +++ b/re2c/lib/regexec_nfa_posix.cc @@ -6,6 +6,7 @@ #include "lib/regex_impl.h" #include "src/options/opt.h" #include "src/debug/debug.h" +#include "src/dfa/closure_posix.h" #include "src/dfa/determinization.h" #include "src/dfa/posix_precedence.h" #include "src/nfa/nfa.h" @@ -14,39 +15,16 @@ namespace re2c { namespace libre2c { -struct cmp_gor1_t -{ - simctx_t &ctx; - inline cmp_gor1_t(simctx_t &c) : ctx(c) {} - - bool operator()(const conf_t &x, const conf_t &y) const; -}; - -enum sssp_alg_t {GOR1, GTOP}; - -template static int do_regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int); -template static void closure_posix(simctx_t &ctx); static void make_one_step(simctx_t &, uint32_t); static void make_final_step(simctx_t &); static void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id); static void compute_prectbl_naive(simctx_t &ctx); // we *do* want these to be inlined -static inline bool scan(simctx_t &ctx, nfa_state_t *q, bool all); -static inline bool relax_gor1(simctx_t &, const conf_t &); -static inline void relax_gtop(simctx_t &, const conf_t &); +static inline void closure_posix(simctx_t &ctx); int regexec_nfa_posix(const regex_t *preg, const char *string - , size_t nmatch, regmatch_t pmatch[], int eflags) -{ - return preg->flags & REG_GTOP - ? do_regexec(preg, string, nmatch, pmatch, eflags) - : do_regexec(preg, string, nmatch, pmatch, eflags); -} - -template -int do_regexec(const regex_t *preg, const char *string - , size_t nmatch, regmatch_t pmatch[], int) + , size_t nmatch, regmatch_t pmatch[], int /* eflags */) { simctx_t &ctx = *preg->simctx; init(ctx, string); @@ -54,12 +32,12 @@ int do_regexec(const regex_t *preg, const char *string // root state can be non-core, so we pass zero as origin to avoid checks const conf_t c0(ctx.nfa.root, 0, HROOT); ctx.reach.push_back(c0); - closure_posix(ctx); + closure_posix(ctx); for (;;) { const uint32_t sym = static_cast(*ctx.cursor++); if (ctx.state.empty() || sym == 0) break; make_one_step(ctx, sym); - closure_posix(ctx); + closure_posix(ctx); } make_final_step(ctx); @@ -84,6 +62,18 @@ int do_regexec(const regex_t *preg, const char *string return 0; } +void closure_posix(simctx_t &ctx) +{ + ctx.history.detach(); + + if (ctx.flags & REG_GTOP) { + closure_posix_gtop(ctx); + } + else { + closure_posix_gor1(ctx); + } +} + void make_one_step(simctx_t &ctx, uint32_t sym) { confset_t &state = ctx.state, &reach = ctx.reach; @@ -145,205 +135,6 @@ void make_final_step(simctx_t &ctx) } } -template<> -void closure_posix(simctx_t &ctx) -{ - confset_t &state = ctx.state, &reach = ctx.reach; - std::vector - &topsort = ctx.gor1_topsort, - &linear = ctx.gor1_linear; - - // init: push configurations ordered by POSIX precedence (highest on top) - state.clear(); - std::sort(reach.begin(), reach.end(), cmp_gor1_t(ctx)); - for (rcconfiter_t c = reach.rbegin(); c != reach.rend(); ++c) { - nfa_state_t *q = c->state; - if (q->clos == NOCLOS) { - q->clos = static_cast(state.size()); - state.push_back(*c); - } - else { - state[q->clos] = *c; - } - topsort.push_back(q); - } - - for (; !topsort.empty(); ) { - - // 1st pass: depth-first postorder traversal of admissible subgraph - for (; !topsort.empty(); ) { - nfa_state_t *q = topsort.back(); - if (q->status == GOR_LINEAR) { - topsort.pop_back(); - } - else { - q->status = GOR_TOPSORT; - if (!scan(ctx, q, false)) { - q->status = GOR_LINEAR; - topsort.pop_back(); - linear.push_back(q); - } - } - } - - // 2nd pass: linear scan of topologically ordered states - for (; !linear.empty(); ) { - nfa_state_t *q = linear.back(); - linear.pop_back(); - if (q->active) { - q->active = 0; - q->arcidx = 0; - scan(ctx, q, true); - } - q->status = GOR_NOPASS; - } - } -} - -inline bool cmp_gor1_t::operator()(const conf_t &x, const conf_t &y) const -{ - const uint32_t xo = x.origin, yo = y.origin; - return xo != yo - && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0; -} - -bool scan(simctx_t &ctx, nfa_state_t *q, bool all) -{ - bool any = false; - - const conf_t &x = ctx.state[q->clos]; - const uint32_t o = x.origin; - const int32_t h = x.thist; - - switch (q->type) { - case nfa_state_t::NIL: - if (q->arcidx == 0) { - any |= relax_gor1(ctx, conf_t(q->nil.out, o, h)); - ++q->arcidx; - } - break; - case nfa_state_t::ALT: - if (q->arcidx == 0) { - any |= relax_gor1(ctx, conf_t(q->alt.out1, o, h)); - ++q->arcidx; - } - if (q->arcidx == 1 && (!any || all)) { - any |= relax_gor1(ctx, conf_t(q->alt.out2, o, h)); - ++q->arcidx; - } - break; - case nfa_state_t::TAG: - if (q->arcidx == 0) { - any |= relax_gor1(ctx, conf_t(q->tag.out, o - , ctx.history.push1(h, q->tag.info))); - ++q->arcidx; - } - break; - default: - break; - } - - return any; -} - -bool relax_gor1(simctx_t &ctx, const conf_t &x) -{ - confset_t &state = ctx.state; - nfa_state_t *q = x.state; - const uint32_t idx = q->clos; - int32_t p1, p2; - - if (q->status == GOR_TOPSORT) { - return false; - } - - if (idx == NOCLOS) { - q->clos = static_cast(state.size()); - state.push_back(x); - } - else if (q->indeg < 2 - || precedence(ctx, x, state[idx], p1, p2) < 0) { - state[idx] = x; - } - else { - return false; - } - - if (q->status == GOR_NOPASS) { - ctx.gor1_topsort.push_back(q); - q->arcidx = 0; - return true; - } - else { - q->active = 1; - return false; - } -} - -template<> -void closure_posix(simctx_t &ctx) -{ - const confset_t &reach = ctx.reach; - confset_t &state = ctx.state; - gtop_heap_t &heap = ctx.gtop_heap; - - state.clear(); - for (cconfiter_t c = reach.begin(); c != reach.end(); ++c) { - relax_gtop(ctx, *c); - } - - for (; !heap.empty(); ) { - nfa_state_t *q = heap.top(); - heap.pop(); - q->active = 0; - - const conf_t &x = state[q->clos]; - const uint32_t o = x.origin; - const int32_t h = x.thist; - - switch (q->type) { - case nfa_state_t::NIL: - relax_gtop(ctx, conf_t(q->nil.out, o, h)); - break; - case nfa_state_t::ALT: - relax_gtop(ctx, conf_t(q->alt.out1, o, h)); - relax_gtop(ctx, conf_t(q->alt.out2, o, h)); - break; - case nfa_state_t::TAG: - relax_gtop(ctx, conf_t(q->tag.out, o - , ctx.history.push1(h, q->tag.info))); - break; - default: - break; - } - } -} - -void relax_gtop(simctx_t &ctx, const conf_t &c) -{ - confset_t &state = ctx.state; - nfa_state_t *q = c.state; - const uint32_t idx = q->clos; - int32_t p1, p2; - - if (idx == NOCLOS) { - q->clos = static_cast(state.size()); - state.push_back(c); - } - else if (q->indeg < 2 - || precedence(ctx, c, state[idx], p1, p2) < 0) { - state[idx] = c; - } - else { - return; - } - - if (!q->active) { - q->active = 1; - ctx.gtop_heap.push(q); - } -} - void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id) { const size_t nsub = ctx.nsub; diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index c58c22e6..e6058246 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -10,6 +10,7 @@ #include "src/options/opt.h" #include "src/dfa/determinization.h" #include "src/dfa/dfa.h" +#include "src/dfa/closure_posix.h" #include "src/dfa/posix_precedence.h" #include "src/dfa/tcmd.h" #include "src/nfa/nfa.h" diff --git a/re2c/src/dfa/closure_leftmost.cc b/re2c/src/dfa/closure_leftmost.cc index 41825882..7b1ef1d9 100644 --- a/re2c/src/dfa/closure_leftmost.cc +++ b/re2c/src/dfa/closure_leftmost.cc @@ -7,9 +7,9 @@ namespace re2c void closure_leftmost(determ_context_t &ctx) { - const closure_t &init = ctx.dc_reached; + const closure_t &init = ctx.reach; closure_t &done = ctx.state; - std::stack &todo = ctx.dc_stack_dfs; + std::stack &todo = ctx.stack_dfs; // enqueue all initial states done.clear(); diff --git a/re2c/src/dfa/closure_posix.cc b/re2c/src/dfa/closure_posix.h similarity index 67% rename from re2c/src/dfa/closure_posix.cc rename to re2c/src/dfa/closure_posix.h index 81b7536e..9e59ef76 100644 --- a/re2c/src/dfa/closure_posix.cc +++ b/re2c/src/dfa/closure_posix.h @@ -1,3 +1,6 @@ +#ifndef _RE2C_DFA_CLOSURE_POSIX_ +#define _RE2C_DFA_CLOSURE_POSIX_ + #include #include "src/dfa/determinization.h" @@ -16,24 +19,15 @@ namespace re2c * can just propagate the new path up to the next join point. */ -struct cmp_gor1_t -{ - determ_context_t &ctx; - - cmp_gor1_t(determ_context_t &); - bool operator()(const clos_t &, const clos_t &) const; -}; - -static void closure_posix_gor1(determ_context_t &); -static void closure_posix_gtop(determ_context_t &); +template static void closure_posix_gor1(ctx_t &); +template static void closure_posix_gtop(ctx_t &); // we *do* want these to be inlined -static inline bool scan(determ_context_t &ctx, nfa_state_t *q, bool all); -static inline bool relax_gor1(determ_context_t &, const clos_t &); -static inline void relax_gtop(determ_context_t &, const clos_t &); +template static inline bool scan(ctx_t &ctx, nfa_state_t *q, bool all); +template static inline bool relax_gor1(ctx_t &, const typename ctx_t::conf_t &); +template static inline void relax_gtop(ctx_t &, const typename ctx_t::conf_t &); - -void closure_posix(determ_context_t &ctx) +inline void closure_posix(determ_context_t &ctx) { DRESET_CLSTATS(ctx); @@ -57,6 +51,24 @@ void closure_posix(determ_context_t &ctx) } } +template +struct cmp_gor1_t +{ + ctx_t &ctx; + + cmp_gor1_t(ctx_t &ctx) : ctx(ctx) {} + + bool operator() + ( const typename ctx_t::conf_t &x + , const typename ctx_t::conf_t &y) const + { + // if longest components differ, leftmost already incorporates that + const uint32_t xo = x.origin, yo = y.origin; + return xo != yo + && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0; + } +}; + /* * note [GOR1 SSSP algorithm] * Cherkassky-Goldberg-Radzik Single Source Shortest Path algorithm. @@ -70,17 +82,18 @@ void closure_posix(determ_context_t &ctx) * Complexity for digraph G = (V, E) is O(|V| * |E|), and O(|V| + |E|) * in case of acyclic graph. */ -void closure_posix_gor1(determ_context_t &ctx) +template +void closure_posix_gor1(ctx_t &ctx) { - closure_t &state = ctx.state, &reach = ctx.dc_reached; + typename ctx_t::confset_t &state = ctx.state, &reach = ctx.reach; std::vector - &topsort = ctx.dc_gor1_topsort, - &linear = ctx.dc_gor1_linear; + &topsort = ctx.gor1_topsort, + &linear = ctx.gor1_linear; // init: push configurations ordered by POSIX precedence (highest on top) state.clear(); - std::sort(reach.begin(), reach.end(), cmp_gor1_t(ctx)); - for (rcclositer_t c = reach.rbegin(); c != reach.rend(); ++c) { + std::sort(reach.begin(), reach.end(), cmp_gor1_t(ctx)); + for (typename ctx_t::rcconfiter_t c = reach.rbegin(); c != reach.rend(); ++c) { nfa_state_t *q = c->state; if (q->clos == NOCLOS) { q->clos = static_cast(state.size()); @@ -126,57 +139,49 @@ void closure_posix_gor1(determ_context_t &ctx) } } -inline cmp_gor1_t::cmp_gor1_t(determ_context_t &ctx) : ctx(ctx) {} - -inline bool cmp_gor1_t::operator()(const clos_t &x, const clos_t &y) const -{ - // if longest components differ, leftmost already incorporates that - const uint32_t xo = x.origin, yo = y.origin; - return xo != yo - && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0; -} - -bool scan(determ_context_t &ctx, nfa_state_t *q, bool all) +template +bool scan(ctx_t &ctx, nfa_state_t *q, bool all) { bool any = false; - clos_t x = ctx.state[q->clos]; + + typedef typename ctx_t::conf_t conf_t; + const conf_t &x = ctx.state[q->clos]; + switch (q->type) { case nfa_state_t::NIL: if (q->arcidx == 0) { - x.state = q->nil.out; - any |= relax_gor1(ctx, x); + any |= relax_gor1(ctx, conf_t(x, q->nil.out)); ++q->arcidx; } break; case nfa_state_t::ALT: if (q->arcidx == 0) { - x.state = q->alt.out1; - any |= relax_gor1(ctx, x); + any |= relax_gor1(ctx, conf_t(x, q->alt.out1)); ++q->arcidx; } if (q->arcidx == 1 && (!any || all)) { - x.state = q->alt.out2; - any |= relax_gor1(ctx, x); + any |= relax_gor1(ctx, conf_t(x, q->alt.out2)); ++q->arcidx; } break; case nfa_state_t::TAG: if (q->arcidx == 0) { - x.state = q->tag.out; - x.thist = ctx.history.push1(x.thist, q->tag.info); - any |= relax_gor1(ctx, x); + any |= relax_gor1(ctx, conf_t(x, q->tag.out + , ctx.history.push1(x.thist, q->tag.info))); ++q->arcidx; } break; default: break; } + return any; } -bool relax_gor1(determ_context_t &ctx, const clos_t &x) +template +bool relax_gor1(ctx_t &ctx, const typename ctx_t::conf_t &x) { - closure_t &state = ctx.state; + typename ctx_t::confset_t &state = ctx.state; nfa_state_t *q = x.state; const uint32_t idx = q->clos; int32_t p1, p2; @@ -198,7 +203,7 @@ bool relax_gor1(determ_context_t &ctx, const clos_t &x) } if (q->status == GOR_NOPASS) { - ctx.dc_gor1_topsort.push_back(q); + ctx.gor1_topsort.push_back(q); q->arcidx = 0; return true; } @@ -230,14 +235,15 @@ bool relax_gor1(determ_context_t &ctx, const clos_t &x) * However the algorithm is simple and optimal for DAGs, therefore we keep it. */ -void closure_posix_gtop(determ_context_t &ctx) +template +void closure_posix_gtop(ctx_t &ctx) { - const closure_t &reach = ctx.dc_reached; - closure_t &state = ctx.state; - gtop_heap_t &heap = ctx.dc_gtop_heap; + const typename ctx_t::confset_t &reach = ctx.reach; + typename ctx_t::confset_t &state = ctx.state; + gtop_heap_t &heap = ctx.gtop_heap; state.clear(); - for (cclositer_t c = reach.begin(); c != reach.end(); ++c) { + for (typename ctx_t::cconfiter_t c = reach.begin(); c != reach.end(); ++c) { relax_gtop(ctx, *c); } @@ -247,22 +253,20 @@ void closure_posix_gtop(determ_context_t &ctx) q->active = 0; DINCCOUNT_CLSCANS(ctx); - clos_t x = ctx.state[q->clos]; + typedef typename ctx_t::conf_t conf_t; + const conf_t &x = ctx.state[q->clos]; + switch (q->type) { case nfa_state_t::NIL: - x.state = q->nil.out; - relax_gtop(ctx, x); + relax_gtop(ctx, conf_t(x, q->nil.out)); break; case nfa_state_t::ALT: - x.state = q->alt.out1; - relax_gtop(ctx, x); - x.state = q->alt.out2; - relax_gtop(ctx, x); + relax_gtop(ctx, conf_t(x, q->alt.out1)); + relax_gtop(ctx, conf_t(x, q->alt.out2)); break; case nfa_state_t::TAG: - x.state = q->tag.out; - x.thist = ctx.history.push1(x.thist, q->tag.info); - relax_gtop(ctx, x); + relax_gtop(ctx, conf_t(x, q->tag.out + , ctx.history.push1(x.thist, q->tag.info))); break; default: break; @@ -270,9 +274,10 @@ void closure_posix_gtop(determ_context_t &ctx) } } -void relax_gtop(determ_context_t &ctx, const clos_t &c) +template +void relax_gtop(ctx_t &ctx, const typename ctx_t::conf_t &c) { - closure_t &state = ctx.state; + typename ctx_t::confset_t &state = ctx.state; nfa_state_t *q = c.state; const uint32_t idx = q->clos; int32_t p1, p2; @@ -291,8 +296,11 @@ void relax_gtop(determ_context_t &ctx, const clos_t &c) if (!q->active) { q->active = 1; - ctx.dc_gtop_heap.push(q); + ctx.gtop_heap.push(q); } } } // namespace re2c + +#endif // _RE2C_DFA_CLOSURE_POSIX_ + diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 98113389..5858d391 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -51,8 +51,8 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond const uint32_t INITIAL_TAGS = init_tag_versions(ctx); // initial state - const clos_t c0 = {nfa.root, 0, INITIAL_TAGS, HROOT, HROOT}; - ctx.dc_reached.push_back(c0); + const clos_t c0(nfa.root, 0, INITIAL_TAGS, HROOT, HROOT); + ctx.reach.push_back(c0); tagged_epsilon_closure(ctx); find_state(ctx); @@ -94,14 +94,14 @@ void reach_on_symbol(determ_context_t &ctx, uint32_t sym) ctx.oldprectbl = kernel->prectbl; ctx.oldprecdim = kernel->size; - closure_t &reached = ctx.dc_reached; - reached.clear(); + closure_t &reach = ctx.reach; + reach.clear(); for (uint32_t i = 0; i < kernel->size; ++i) { nfa_state_t *s = transition(kernel->state[i], symbol); if (s) { - clos_t c = {s, i, kernel->tvers[i], kernel->thist[i], HROOT}; - reached.push_back(c); + const clos_t c(s, i, kernel->tvers[i], kernel->thist[i], HROOT); + reach.push_back(c); } } } @@ -224,24 +224,24 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg , dc_target(dfa_t::NIL) , dc_symbol(0) , dc_actions(NULL) - , dc_reached() - , state() , dc_tagvertbl(nfa.tags.size()) , history() , dc_kernels() , dc_buffers(dc_allocator) - , dc_stack_dfs() - , dc_gor1_topsort() - , dc_gor1_linear() - , dc_gtop_buffer() - , dc_gtop_cmp() - , dc_gtop_heap(dc_gtop_cmp, dc_gtop_buffer) , dc_hc_caches() , dc_newvers(newver_cmp_t(history, dc_hc_caches)) , dc_path1() , dc_path2() , dc_path3() , dc_tagcount() + , reach() + , state() + , stack_dfs() + , gor1_topsort() + , gor1_linear() + , gtop_buffer() + , gtop_cmp() + , gtop_heap(gtop_cmp, gtop_buffer) , newprectbl(NULL) , oldprectbl(NULL) , oldprecdim(0) @@ -271,11 +271,11 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg } if (opts->posix_closure == POSIX_CLOSURE_GTOP) { - dc_gtop_buffer.reserve(nstates); + gtop_buffer.reserve(nstates); } else { - dc_gor1_topsort.reserve(nstates); - dc_gor1_linear.reserve(nstates); + gor1_topsort.reserve(nstates); + gor1_linear.reserve(nstates); } } diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h index 204336da..0c3f6d62 100644 --- a/re2c/src/dfa/determinization.h +++ b/re2c/src/dfa/determinization.h @@ -40,6 +40,14 @@ struct clos_t hidx_t ttran; // history of transition tags hidx_t thist; // history of lookahead tags + inline clos_t() + : state(NULL), origin(0), tvers(0), ttran(0), thist(HROOT) {} + inline clos_t(nfa_state_t *s, uint32_t o, uint32_t v, hidx_t t, hidx_t h) + : state(s), origin(o), tvers(v), ttran(t), thist(h) {} + inline clos_t(const clos_t &c, nfa_state_t *s) + : state(s), origin(c.origin), tvers(c.tvers), ttran(c.ttran), thist(c.thist) {} + inline clos_t(const clos_t &c, nfa_state_t *s, hidx_t h) + : state(s), origin(c.origin), tvers(c.tvers), ttran(c.ttran), thist(h) {} static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; } static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; } }; @@ -126,7 +134,8 @@ typedef std::priority_queue struct determ_context_t { - typedef std::vector confset_t; + typedef clos_t conf_t; + typedef std::vector confset_t; typedef confset_t::iterator confiter_t; typedef confset_t::const_iterator cconfiter_t; typedef confset_t::reverse_iterator rconfiter_t; @@ -147,18 +156,10 @@ struct determ_context_t uint32_t dc_target; // to-state of the current transition uint32_t dc_symbol; // alphabet symbol of the current transition tcmd_t *dc_actions; // tag actions of the current transition - closure_t dc_reached; - closure_t state; tagver_table_t dc_tagvertbl; tag_history_t history; // prefix trie of tag histories kernels_t dc_kernels; // TDFA states under construction kernel_buffers_t dc_buffers; - std::stack dc_stack_dfs; // stack used for DFS in leftmost greedy closure - std::vector dc_gor1_topsort; // stack used in GOR1 (POSIX closure) - std::vector dc_gor1_linear; // stack used in GOR1 (POSIX closure) - std::vector dc_gtop_buffer; // buffer used for heap in GTOP (POSIX closure) - cmp_gtop_t dc_gtop_cmp; // heap comparator used in GTOP (POSIX closure) - gtop_heap_t dc_gtop_heap; // heap used in GTOP (POSIX closure) hc_caches_t dc_hc_caches; // per-tag cache of history comparisons newvers_t dc_newvers; // map of triples (tag, version, history) to new version tag_path_t dc_path1; // buffer 1 for tag history @@ -166,6 +167,16 @@ struct determ_context_t tag_path_t dc_path3; // buffer 3 for tag history std::vector dc_tagcount; // buffer for counting sort on tag history + // tagged epsilon-closure + confset_t reach; + confset_t state; + std::stack stack_dfs; // stack used for DFS in leftmost greedy closure + std::vector gor1_topsort; // stack used in GOR1 (POSIX closure) + std::vector gor1_linear; // stack used in GOR1 (POSIX closure) + std::vector gtop_buffer; // buffer used for heap in GTOP (POSIX closure) + cmp_gtop_t gtop_cmp; // heap comparator used in GTOP (POSIX closure) + gtop_heap_t gtop_heap; // heap used in GTOP (POSIX closure) + // precedence table and auxilary data for POSIX disambiguation int32_t *newprectbl; const int32_t *oldprectbl; @@ -176,8 +187,8 @@ struct determ_context_t std::vector worklist; // debug - dump_dfa_t dc_dump; - closure_stats_t dc_clstats; + dump_dfa_t dc_dump; + closure_stats_t dc_clstats; determ_context_t(const opt_t *, Msg &, const std::string &, const nfa_t &, dfa_t &); ~determ_context_t(); -- 2.50.1