From: Ulya Trofimovich Date: Mon, 4 Mar 2019 17:14:49 +0000 (+0000) Subject: Deduplicated leftmost greedy closure implementations in re2c and libre2c. X-Git-Tag: 1.2~126 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=469fd0a7fb406afed00f60bf2a8db94c1cbcdaf8;p=re2c Deduplicated leftmost greedy closure implementations in re2c and libre2c. --- diff --git a/re2c/Makefile.am b/re2c/Makefile.am index d1d92938..9d11ad21 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -23,6 +23,7 @@ re2c_HDR = \ src/adfa/action.h \ src/adfa/adfa.h \ src/cfg/cfg.h \ + src/dfa/closure_leftmost.h \ src/dfa/closure_posix.h \ src/dfa/determinization.h \ src/dfa/dfa.h \ @@ -110,7 +111,6 @@ re2c_SRC = \ src/cfg/rename.cc \ src/cfg/varalloc.cc \ src/dfa/closure.cc \ - src/dfa/closure_leftmost.cc \ src/dfa/dead_rules.cc \ src/dfa/determinization.cc \ src/dfa/fallback_tags.cc \ diff --git a/re2c/Makefile.lib.am b/re2c/Makefile.lib.am index 4f44f706..69c9c436 100644 --- a/re2c/Makefile.lib.am +++ b/re2c/Makefile.lib.am @@ -20,6 +20,7 @@ libre2c_la_HDR = \ src/adfa/action.h \ src/adfa/adfa.h \ src/cfg/cfg.h \ + src/dfa/closure_leftmost.h \ src/dfa/closure_posix.h \ src/dfa/determinization.h \ src/dfa/dfa.h \ @@ -97,7 +98,6 @@ libre2c_la_SRC = \ src/cfg/rename.cc \ src/cfg/varalloc.cc \ src/dfa/closure.cc \ - src/dfa/closure_leftmost.cc \ src/debug/dump_adfa.cc \ src/debug/dump_cfg.cc \ src/debug/dump_dfa.cc \ diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc index 19c49a7e..d261cdfd 100644 --- a/re2c/lib/regexec_nfa_leftmost.cc +++ b/re2c/lib/regexec_nfa_leftmost.cc @@ -3,6 +3,7 @@ #include "lib/regex_impl.h" #include "src/options/opt.h" #include "src/debug/debug.h" +#include "src/dfa/closure_leftmost.h" #include "src/dfa/determinization.h" #include "src/nfa/nfa.h" @@ -11,7 +12,6 @@ namespace re2c { namespace libre2c { static void reach_on_symbol(lsimctx_t &, uint32_t); -static void closure_leftmost(lsimctx_t &); static void update_offsets(lsimctx_t &ctx, const conf_t &c); int regexec_nfa_leftmost(const regex_t *preg, const char *string @@ -23,14 +23,14 @@ int regexec_nfa_leftmost(const regex_t *preg, const char *string // root state can be non-core, so we pass zero as origin to avoid checks const conf_t c0(ctx.nfa.root, 0, HROOT); ctx.reach.push_back(c0); - closure_leftmost(ctx); + closure_leftmost_dfs(ctx); for (;;) { const uint32_t sym = static_cast(*ctx.cursor++); if (ctx.state.empty() || sym == 0) break; reach_on_symbol(ctx, sym); ++ctx.step; - closure_leftmost(ctx); + closure_leftmost_dfs(ctx); } for (cconfiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) { @@ -93,40 +93,6 @@ void reach_on_symbol(lsimctx_t &ctx, uint32_t sym) ctx.history.init(); } -void closure_leftmost(lsimctx_t &ctx) -{ - confset_t &state = ctx.state, &wl = ctx.reach; - state.clear(); - for (; !wl.empty(); ) { - - const conf_t &x = wl.back(); - nfa_state_t *n = x.state; - const uint32_t o = x.origin; - const int32_t h = x.thist; - wl.pop_back(); - - if (n->clos != NOCLOS) continue; - - n->clos = 0; - state.push_back(x); - - switch (n->type) { - case nfa_state_t::NIL: - wl.push_back(conf_t(n->nil.out, o, h)); - break; - case nfa_state_t::ALT: - wl.push_back(conf_t(n->alt.out2, o, h)); - wl.push_back(conf_t(n->alt.out1, o, h)); - break; - case nfa_state_t::TAG: - wl.push_back(conf_t(n->tag.out, o, ctx.history.link(ctx, x))); - break; - default: - break; - } - } -} - void update_offsets(lsimctx_t &ctx, const conf_t &c) { const size_t nsub = ctx.nsub; diff --git a/re2c/lib/regexec_nfa_leftmost_trie.cc b/re2c/lib/regexec_nfa_leftmost_trie.cc index 37e37fea..14224405 100644 --- a/re2c/lib/regexec_nfa_leftmost_trie.cc +++ b/re2c/lib/regexec_nfa_leftmost_trie.cc @@ -3,6 +3,7 @@ #include "lib/regex_impl.h" #include "src/options/opt.h" #include "src/debug/debug.h" +#include "src/dfa/closure_leftmost.h" #include "src/dfa/determinization.h" #include "src/nfa/nfa.h" @@ -10,8 +11,8 @@ namespace re2c { namespace libre2c { -static void reach_on_symbol(lzsimctx_t &, uint32_t); -static void closure_leftmost(lzsimctx_t &); +static void make_step(lzsimctx_t &, uint32_t); +static void make_final_step(lzsimctx_t &ctx); int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) @@ -22,26 +23,19 @@ int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string nfa_state_t *s0 = ctx.nfa.root; const conf_t c0(s0, s0->coreid, HROOT); ctx.reach.push_back(c0); - closure_leftmost(ctx); - + closure_leftmost_dfs(ctx); for (;;) { const uint32_t sym = static_cast(*ctx.cursor++); if (ctx.state.empty() || sym == 0) break; - reach_on_symbol(ctx, sym); - ++ctx.step; - closure_leftmost(ctx); - } - - const confset_t &state = ctx.state; - cconfiter_t b = state.begin(), e = state.end(), i; - for (i = b; i != e; ++i) { - i->state->clos = NOCLOS; + make_step(ctx, sym); + closure_leftmost_dfs(ctx); } + make_final_step(ctx); return finalize(ctx, string, nmatch, pmatch); } -void reach_on_symbol(lzsimctx_t &ctx, uint32_t sym) +void make_step(lzsimctx_t &ctx, uint32_t sym) { const confset_t &state = ctx.state; confset_t &reach = ctx.reach; @@ -65,44 +59,28 @@ void reach_on_symbol(lzsimctx_t &ctx, uint32_t sym) } } } + else if (s->type == nfa_state_t::FIN) { + ctx.marker = ctx.cursor; + ctx.hidx = i->thist; + ctx.rule = 0; + } } + + ++ctx.step; } -void closure_leftmost(lzsimctx_t &ctx) +void make_final_step(lzsimctx_t &ctx) { - confset_t &state = ctx.state, &wl = ctx.reach; - state.clear(); - for (; !wl.empty(); ) { - - const conf_t &x = wl.back(); - nfa_state_t *n = x.state; - const uint32_t o = x.origin; - const int32_t h = x.thist; - wl.pop_back(); - - if (n->clos != NOCLOS) continue; - - n->clos = 0; - state.push_back(x); - - switch (n->type) { - case nfa_state_t::NIL: - wl.push_back(conf_t(n->nil.out, o, h)); - break; - case nfa_state_t::ALT: - wl.push_back(conf_t(n->alt.out2, o, h)); - wl.push_back(conf_t(n->alt.out1, o, h)); - break; - case nfa_state_t::TAG: - wl.push_back(conf_t(n->tag.out, o, ctx.history.link(ctx, x))); - break; - case nfa_state_t::RAN: - break; - case nfa_state_t::FIN: - ctx.marker = ctx.cursor + 1; - ctx.hidx = x.thist; - ctx.rule = 0; - break; + for (confiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) { + nfa_state_t *s = i->state; + + s->clos = NOCLOS; + DASSERT(s->active == 0); + + if (s->type == nfa_state_t::FIN) { + ctx.marker = ctx.cursor; + ctx.hidx = i->thist; + ctx.rule = 0; } } } diff --git a/re2c/lib/regexec_nfa_posix_trie.cc b/re2c/lib/regexec_nfa_posix_trie.cc index 080b402d..577ec10b 100644 --- a/re2c/lib/regexec_nfa_posix_trie.cc +++ b/re2c/lib/regexec_nfa_posix_trie.cc @@ -1,6 +1,3 @@ -#include -#include - #include "lib/lex.h" #include "lib/regex.h" #include "lib/regex_impl.h" diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index 24a11b1e..d9f08aef 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -10,6 +10,7 @@ #include "src/options/opt.h" #include "src/dfa/determinization.h" #include "src/dfa/dfa.h" +#include "src/dfa/closure_leftmost.h" #include "src/dfa/closure_posix.h" #include "src/dfa/posix_precedence.h" #include "src/dfa/tcmd.h" diff --git a/re2c/src/dfa/closure_leftmost.cc b/re2c/src/dfa/closure_leftmost.cc deleted file mode 100644 index eb50becc..00000000 --- a/re2c/src/dfa/closure_leftmost.cc +++ /dev/null @@ -1,47 +0,0 @@ -#include "src/dfa/determinization.h" -#include "src/nfa/nfa.h" - - -namespace re2c -{ - -void closure_leftmost(ldetctx_t &ctx) -{ - closure_t &done = ctx.state, &todo = ctx.reach; - done.clear(); - - // DFS; linear complexity - for (; !todo.empty(); ) { - const clos_t &x = todo.back(); - nfa_state_t *n = x.state; - todo.pop_back(); - - if (n->clos != NOCLOS) continue; - - n->clos = static_cast(done.size()); - done.push_back(x); - - switch (n->type) { - case nfa_state_t::NIL: - todo.push_back(clos_t(x, n->nil.out)); - break; - case nfa_state_t::ALT: - todo.push_back(clos_t(x, n->alt.out2)); - todo.push_back(clos_t(x, n->alt.out1)); - break; - case nfa_state_t::TAG: - todo.push_back(clos_t(x, n->tag.out, ctx.history.link(ctx, x))); - break; - default: - break; - } - } - - // reset associated closure items - // (do this before removing any states from closure) - for (clositer_t i = done.begin(); i != done.end(); ++i) { - i->state->clos = NOCLOS; - } -} - -} // namespace re2c diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h index 6e841475..55e12675 100644 --- a/re2c/src/dfa/determinization.h +++ b/re2c/src/dfa/determinization.h @@ -18,8 +18,7 @@ #include "src/util/slab_allocator.h" -namespace re2c -{ +namespace re2c { // fwd struct opt_t; @@ -190,7 +189,6 @@ typedef determ_context_t ldetctx_t; template void tagged_epsilon_closure(ctx_t &ctx); template void find_state(ctx_t &ctx); -void closure_leftmost(ldetctx_t &); inline bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const {