From: Ulya Trofimovich Date: Fri, 8 Mar 2019 07:21:49 +0000 (+0000) Subject: Use state index in closure instead of core index, and don't keep core indices at... X-Git-Tag: 1.2~97 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f8aad4ebd098c31aca82d32f9115facd7d56433e;p=re2c Use state index in closure instead of core index, and don't keep core indices at all. --- diff --git a/lib/regexec_nfa_leftmost.cc b/lib/regexec_nfa_leftmost.cc index d261cdfd..b8a05540 100644 --- a/lib/regexec_nfa_leftmost.cc +++ b/lib/regexec_nfa_leftmost.cc @@ -12,7 +12,7 @@ namespace re2c { namespace libre2c { static void reach_on_symbol(lsimctx_t &, uint32_t); -static void update_offsets(lsimctx_t &ctx, const conf_t &c); +static void update_offsets(lsimctx_t &ctx, const conf_t &c, uint32_t id); int regexec_nfa_leftmost(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) @@ -29,7 +29,6 @@ int regexec_nfa_leftmost(const regex_t *preg, const char *string const uint32_t sym = static_cast(*ctx.cursor++); if (ctx.state.empty() || sym == 0) break; reach_on_symbol(ctx, sym); - ++ctx.step; closure_leftmost_dfs(ctx); } @@ -37,7 +36,7 @@ int regexec_nfa_leftmost(const regex_t *preg, const char *string nfa_state_t *s = i->state; s->clos = NOCLOS; if (s->type == nfa_state_t::FIN) { - update_offsets(ctx, *i); + update_offsets(ctx, *i, NONCORE); } } @@ -70,6 +69,7 @@ void reach_on_symbol(lsimctx_t &ctx, uint32_t sym) DASSERT(reach.empty()); // in reverse, so that future closure DFS has states in stack order + uint32_t j = 0; for (rcconfiter_t i = state.rbegin(), e = state.rend(); i != e; ++i) { nfa_state_t *s = i->state; s->clos = NOCLOS; @@ -77,23 +77,25 @@ void reach_on_symbol(lsimctx_t &ctx, uint32_t sym) if (s->type == nfa_state_t::RAN) { for (const Range *r = s->ran.ran; r; r = r->next()) { if (r->lower() <= sym && sym < r->upper()) { - conf_t c(s->ran.out, s->coreid, HROOT); + conf_t c(s->ran.out, j, HROOT); reach.push_back(c); - update_offsets(ctx, *i); + update_offsets(ctx, *i, j); + ++j; break; } } } else if (s->type == nfa_state_t::FIN) { - update_offsets(ctx, *i); + update_offsets(ctx, *i, NONCORE); } } std::swap(ctx.offsets1, ctx.offsets2); ctx.history.init(); + ++ctx.step; } -void update_offsets(lsimctx_t &ctx, const conf_t &c) +void update_offsets(lsimctx_t &ctx, const conf_t &c, uint32_t id) { const size_t nsub = ctx.nsub; bool *done = ctx.done; @@ -106,7 +108,7 @@ void update_offsets(lsimctx_t &ctx, const conf_t &c) o = ctx.offsets3; } else { - o = ctx.offsets1 + s->coreid * nsub; + o = ctx.offsets1 + id * nsub; } memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t)); diff --git a/lib/regexec_nfa_leftmost_trie.cc b/lib/regexec_nfa_leftmost_trie.cc index 14224405..52aac9bb 100644 --- a/lib/regexec_nfa_leftmost_trie.cc +++ b/lib/regexec_nfa_leftmost_trie.cc @@ -20,8 +20,7 @@ int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string lzsimctx_t &ctx = *static_cast(preg->simctx); init(ctx, string); - nfa_state_t *s0 = ctx.nfa.root; - const conf_t c0(s0, s0->coreid, HROOT); + const conf_t c0(ctx.nfa.root, 0/* unused */, HROOT); ctx.reach.push_back(c0); closure_leftmost_dfs(ctx); for (;;) { @@ -53,7 +52,7 @@ void make_step(lzsimctx_t &ctx, uint32_t sym) if (s->type == nfa_state_t::RAN) { for (const Range *r = s->ran.ran; r; r = r->next()) { if (r->lower() <= sym && sym < r->upper()) { - const conf_t c(s->ran.out, 0, i->thist); + const conf_t c(s->ran.out, 0/* unused */, i->thist); reach.push_back(c); break; } diff --git a/lib/regexec_nfa_posix_trie.cc b/lib/regexec_nfa_posix_trie.cc index 577ec10b..76a045dc 100644 --- a/lib/regexec_nfa_posix_trie.cc +++ b/lib/regexec_nfa_posix_trie.cc @@ -21,8 +21,7 @@ int regexec_nfa_posix_trie(const regex_t *preg, const char *string pzsimctx_t &ctx = *static_cast(preg->simctx); init(ctx, string); - nfa_state_t *s0 = ctx.nfa.root; - const conf_t c0(s0, s0->coreid, HROOT); + const conf_t c0(ctx.nfa.root, 0, HROOT); ctx.reach.push_back(c0); closure_posix_gtop(ctx); for (;;) { @@ -43,6 +42,7 @@ void make_step(pzsimctx_t &ctx, uint32_t sym) cconfiter_t b = state.begin(), e = state.end(), i; reach.clear(); + uint32_t j = 0; for (i = b; i != e; ++i) { nfa_state_t *s = i->state; @@ -52,7 +52,7 @@ void make_step(pzsimctx_t &ctx, uint32_t sym) if (s->type == nfa_state_t::RAN) { for (const Range *r = s->ran.ran; r; r = r->next()) { if (r->lower() <= sym && sym < r->upper()) { - const conf_t c(s->ran.out, s->coreid, i->thist); + const conf_t c(s->ran.out, j++, i->thist); reach.push_back(c); break; } diff --git a/src/nfa/nfa.h b/src/nfa/nfa.h index 0d66387b..b06a6f27 100644 --- a/src/nfa/nfa.h +++ b/src/nfa/nfa.h @@ -56,7 +56,6 @@ struct nfa_state_t uint32_t active : 1; // boolean uint32_t indeg : 27; // the rest; we are unlikely to have more than 2^27 states uint32_t topord; // state index in fake topological ordering - uint32_t coreid; // core state index void init(size_t r) { @@ -67,7 +66,6 @@ struct nfa_state_t active = 0; indeg = 0; topord = 0; - coreid = 0; } void make_alt(size_t r, nfa_state_t *s1, nfa_state_t *s2) diff --git a/src/nfa/re_to_nfa.cc b/src/nfa/re_to_nfa.cc index 13ccf715..cc84d6d5 100644 --- a/src/nfa/re_to_nfa.cc +++ b/src/nfa/re_to_nfa.cc @@ -9,11 +9,6 @@ namespace re2c { -static void calc_indegrees(nfa_state_t *); -static void calc_topord(nfa_state_t *, uint32_t &); -static void calc_coreid(nfa_state_t *, uint32_t &); - - /* * note [counted repetition and iteration expansion] * @@ -41,6 +36,8 @@ struct rtn_ctx_t size_t nrule; }; +static void stats(nfa_state_t *n, uint32_t &topord, uint32_t &ncores); + static nfa_state_t *re_to_nfa(rtn_ctx_t &ctx, const RE *re, nfa_state_t *t) { nfa_t &nfa = ctx.nfa; @@ -142,13 +139,10 @@ nfa_t::nfa_t(const RESpec &spec) } } - if (spec.opts->posix_semantics) { - // needed for closure algorithms GOR1 and GTOP - uint32_t topord = 0; - calc_topord(root, topord); - calc_indegrees(root); - } - calc_coreid(root, ncores); + // in-degree and topological index are used by POSIX disambiguation; + // the number of core states is used for both POSIX and leftmost + uint32_t topord = 0; + stats(root, topord, ncores); } nfa_t::~nfa_t() @@ -156,79 +150,32 @@ nfa_t::~nfa_t() delete[] states; } -void calc_indegrees(nfa_state_t *n) +void stats(nfa_state_t *n, uint32_t &topord, uint32_t &ncores) { ++n->indeg; if (n->indeg > 1) return; switch (n->type) { case nfa_state_t::NIL: - calc_indegrees(n->nil.out); + stats(n->nil.out, topord, ncores); break; case nfa_state_t::ALT: - calc_indegrees(n->alt.out1); - calc_indegrees(n->alt.out2); + stats(n->alt.out1, topord, ncores); + stats(n->alt.out2, topord, ncores); break; case nfa_state_t::TAG: - calc_indegrees(n->tag.out); + stats(n->tag.out, topord, ncores); break; case nfa_state_t::RAN: - calc_indegrees(n->ran.out); - case nfa_state_t::FIN: - break; - } -} - -void calc_topord(nfa_state_t *n, uint32_t &topord) -{ - if (n->topord != 0) return; - n->topord = ~0u; // temporary "visited" marker - - switch (n->type) { - case nfa_state_t::NIL: - calc_topord(n->nil.out, topord); - break; - case nfa_state_t::ALT: - calc_topord(n->alt.out1, topord); - calc_topord(n->alt.out2, topord); - break; - case nfa_state_t::TAG: - calc_topord(n->tag.out, topord); - break; - case nfa_state_t::RAN: - calc_topord(n->ran.out, topord); + ncores++; + stats(n->ran.out, topord, ncores); break; case nfa_state_t::FIN: + ncores++; break; } n->topord = topord++; } -void calc_coreid(nfa_state_t *n, uint32_t &coreid) -{ - if (n->coreid != 0) return; - n->coreid = NONCORE; - - switch (n->type) { - case nfa_state_t::NIL: - calc_coreid(n->nil.out, coreid); - break; - case nfa_state_t::ALT: - calc_coreid(n->alt.out1, coreid); - calc_coreid(n->alt.out2, coreid); - break; - case nfa_state_t::TAG: - calc_coreid(n->tag.out, coreid); - break; - case nfa_state_t::RAN: - n->coreid = coreid++; - calc_coreid(n->ran.out, coreid); - break; - case nfa_state_t::FIN: - n->coreid = coreid++; - break; - } -} - } // namespace re2c