From 779747f70942ee3eda0b3183fcdf3b344657f166 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 4 Mar 2019 16:43:59 +0000 Subject: [PATCH] Reuse second closure buffer as a stack for leftmost greedy closure. --- re2c/src/dfa/closure_leftmost.cc | 21 ++++-------- re2c/src/dfa/closure_leftmost.h | 59 ++++++++++++++++++++++++++++++++ re2c/src/dfa/determinization.cc | 8 +++-- re2c/src/dfa/determinization.h | 1 - 4 files changed, 71 insertions(+), 18 deletions(-) create mode 100644 re2c/src/dfa/closure_leftmost.h diff --git a/re2c/src/dfa/closure_leftmost.cc b/re2c/src/dfa/closure_leftmost.cc index b9e81d7b..eb50becc 100644 --- a/re2c/src/dfa/closure_leftmost.cc +++ b/re2c/src/dfa/closure_leftmost.cc @@ -7,21 +7,14 @@ namespace re2c void closure_leftmost(ldetctx_t &ctx) { - const closure_t &init = ctx.reach; - closure_t &done = ctx.state; - std::stack &todo = ctx.stack_dfs; - - // enqueue all initial states + closure_t &done = ctx.state, &todo = ctx.reach; done.clear(); - for (rcclositer_t c = init.rbegin(); c != init.rend(); ++c) { - todo.push(*c); - } // DFS; linear complexity for (; !todo.empty(); ) { - const clos_t &x = todo.top(); + const clos_t &x = todo.back(); nfa_state_t *n = x.state; - todo.pop(); + todo.pop_back(); if (n->clos != NOCLOS) continue; @@ -30,14 +23,14 @@ void closure_leftmost(ldetctx_t &ctx) switch (n->type) { case nfa_state_t::NIL: - todo.push(clos_t(x, n->nil.out)); + todo.push_back(clos_t(x, n->nil.out)); break; case nfa_state_t::ALT: - todo.push(clos_t(x, n->alt.out2)); - todo.push(clos_t(x, n->alt.out1)); + todo.push_back(clos_t(x, n->alt.out2)); + todo.push_back(clos_t(x, n->alt.out1)); break; case nfa_state_t::TAG: - todo.push(clos_t(x, n->tag.out, ctx.history.link(ctx, x))); + todo.push_back(clos_t(x, n->tag.out, ctx.history.link(ctx, x))); break; default: break; diff --git a/re2c/src/dfa/closure_leftmost.h b/re2c/src/dfa/closure_leftmost.h new file mode 100644 index 00000000..90d8acd7 --- /dev/null +++ b/re2c/src/dfa/closure_leftmost.h @@ -0,0 +1,59 @@ +#ifndef _RE2C_DFA_CLOSURE_LEFTMOST_ +#define _RE2C_DFA_CLOSURE_LEFTMOST_ + +#include "src/dfa/determinization.h" +#include "src/nfa/nfa.h" + + +namespace re2c { + +template static void closure_leftmost_dfs(ctx_t &ctx); + +inline void closure_leftmost(ldetctx_t &ctx) +{ + closure_leftmost_dfs(ctx); + + // cleanup + for (clositer_t i = ctx.state.begin(); i != ctx.state.end(); ++i) { + i->state->clos = NOCLOS; + } +} + +template +void closure_leftmost_dfs(ctx_t &ctx) +{ + typename ctx_t::confset_t &state = ctx.state, &stack = ctx.reach; + state.clear(); + + // DFS; linear complexity + for (; !stack.empty(); ) { + typedef typename ctx_t::conf_t conf_t; + const conf_t x = stack.back(); + stack.pop_back(); + nfa_state_t *n = x.state; + + if (n->clos != NOCLOS) continue; + + n->clos = static_cast(state.size()); + state.push_back(x); + + switch (n->type) { + case nfa_state_t::NIL: + stack.push_back(conf_t(x, n->nil.out)); + break; + case nfa_state_t::ALT: + stack.push_back(conf_t(x, n->alt.out2)); + stack.push_back(conf_t(x, n->alt.out1)); + break; + case nfa_state_t::TAG: + stack.push_back(conf_t(x, n->tag.out, ctx.history.link(ctx, x))); + break; + default: + break; + } + } +} + +} // namespace re2c + +#endif // _RE2C_DFA_CLOSURE_LEFTMOST_ diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index a141a38e..e1a7a8c8 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -109,7 +109,7 @@ template void reach_on_symbol(ctx_t &ctx, uint32_t sym) { ctx.dc_symbol = sym; - const uint32_t symbol = ctx.dfa.charset[ctx.dc_symbol]; + const uint32_t symbol = ctx.dfa.charset[sym]; const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin]; ctx.oldprectbl = kernel->prectbl; @@ -118,7 +118,10 @@ void reach_on_symbol(ctx_t &ctx, uint32_t sym) closure_t &reach = ctx.reach; reach.clear(); - for (uint32_t i = 0; i < kernel->size; ++i) { + // Add configurations in reverse order: leftmost greedy closure uses + // the resulting array as stack, and POSIX closure doesn't care (GOR1 + // pre-sorts configurations, and GTOP uses priority queue). + for (uint32_t i = static_cast(kernel->size); i --> 0; ) { nfa_state_t *s = transition(kernel->state[i], symbol); if (s) { const clos_t c(s, i, kernel->tvers[i], kernel->thist[i], HROOT); @@ -256,7 +259,6 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg , dc_tagcount() , reach() , state() - , stack_dfs() , gor1_topsort() , gor1_linear() , gtop_buffer() diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h index 45a20949..6e841475 100644 --- a/re2c/src/dfa/determinization.h +++ b/re2c/src/dfa/determinization.h @@ -161,7 +161,6 @@ struct determ_context_t // tagged epsilon-closure confset_t reach; confset_t state; - std::stack stack_dfs; // stack used for DFS in leftmost greedy closure std::vector gor1_topsort; // stack used in GOR1 (POSIX closure) std::vector gor1_linear; // stack used in GOR1 (POSIX closure) std::vector gtop_buffer; // buffer used for heap in GTOP (POSIX closure) -- 2.40.0