From 8d1ed510d50b770793d1198022286993e76a09c4 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 1 Mar 2019 10:55:38 +0000 Subject: [PATCH] Deduplicated tag history definitions used in re2c and libre2c. --- re2c/lib/regex_impl.h | 88 +-------- re2c/lib/regexec.cc | 23 +-- re2c/lib/regexec_nfa_leftmost.cc | 10 +- re2c/lib/regexec_nfa_leftmost_trie.cc | 4 +- re2c/lib/regexec_nfa_posix.cc | 54 +++--- re2c/lib/regexec_nfa_posix_trie.cc | 24 +-- re2c/lib/test.cc | 7 +- re2c/src/debug/dump_dfa.cc | 29 ++- re2c/src/dfa/closure_posix.cc | 6 +- re2c/src/dfa/find_state.cc | 6 +- re2c/src/dfa/posix_precedence.cc | 12 +- re2c/src/dfa/tag_history.h | 170 +++++++++++++++--- re2c/src/dfa/tcmd.cc | 14 +- .../dfa_raw.i--posix-captures--dump-dfa-raw.c | 4 +- 14 files changed, 242 insertions(+), 209 deletions(-) diff --git a/re2c/lib/regex_impl.h b/re2c/lib/regex_impl.h index f5fb9c7a..2feab0bd 100644 --- a/re2c/lib/regex_impl.h +++ b/re2c/lib/regex_impl.h @@ -16,64 +16,13 @@ namespace libre2c { typedef std::vector tag_path_t; -const tag_info_t NOINFO = {0x3fffFFFF, 0}; - -static const uint32_t NONFIN = ~0u; -static const uint32_t USED = NONFIN - 1; - -struct history_t -{ - struct arc_t { - int32_t node; - int32_t prev; - int32_t next; - - inline arc_t(int32_t node, int32_t prev, int32_t next) - : node(node), prev(prev), next(next) {} - }; - - struct node_t { - tag_info_t info; - int32_t pred; - union { - int32_t last; - uint32_t step; - }; - union { - int32_t next; - uint32_t orig; - }; - uint32_t finidx; - - inline node_t(tag_info_t info, int32_t pred) - : info(info), pred(pred), last(-1), next(-1), finidx(NONFIN) {} - inline node_t(tag_info_t info, int32_t pred, uint32_t step, uint32_t orig) - : info(info), pred(pred), step(step), orig(orig), finidx(NONFIN) {} - }; - - static const int32_t ROOT; - - std::vector nodes; - std::vector arcs; - - explicit history_t(size_t nstates); - inline void init(); - inline node_t &node(int32_t i) { return nodes[static_cast(i)]; } - inline const node_t &node(int32_t i) const { return nodes[static_cast(i)]; } - inline arc_t &arc(int32_t i) { return arcs[static_cast(i)]; } - inline const arc_t &arc(int32_t i) const { return arcs[static_cast(i)]; } - inline int32_t push(int32_t i, uint32_t step, tag_info_t info, uint32_t orig); - inline int32_t push(tag_info_t info, int32_t idx); - FORBID_COPY(history_t); -}; - struct conf_t { nfa_state_t *state; uint32_t origin; int32_t thist; - inline conf_t(): state(NULL), origin(0), thist(history_t::ROOT) {} + inline conf_t(): state(NULL), origin(0), thist(HROOT) {} inline conf_t(nfa_state_t *s, uint32_t o, int32_t h) : state(s), origin(o), thist(h) {} }; @@ -114,7 +63,7 @@ struct simctx_t confset_t reach; confset_t state; - history_t hist; + tag_history_t hist; int32_t hidx; uint32_t step; @@ -157,39 +106,6 @@ int regexec_nfa_posix_trie(const regex_t *preg, const char *string, size_t nmatc int regexec_nfa_leftmost(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); -void history_t::init() -{ - nodes.clear(); - arcs.clear(); - nodes.push_back(node_t(NOINFO, -1)); -} - -int32_t history_t::push(tag_info_t info, int32_t idx) -{ - const int32_t i = static_cast(nodes.size()); - if (idx != -1) { - node_t &n = node(idx); - const int32_t a = static_cast(arcs.size()); - arcs.push_back(arc_t(i, n.last, -1)); - if (n.next == -1) { - n.next = a; - } - else { - arc(n.last).next = a; - } - n.last = a; - } - nodes.push_back(node_t(info, idx)); - return i; -} - -int32_t history_t::push(int32_t idx, uint32_t step, tag_info_t info, uint32_t orig) -{ - const int32_t i = static_cast(nodes.size()); - nodes.push_back(node_t(info, idx, step, orig)); - return i; -} - bool ran_or_fin_t::operator()(const conf_t &c) { switch (c.state->type) { diff --git a/re2c/lib/regexec.cc b/re2c/lib/regexec.cc index 2fec52c0..c90e1c5a 100644 --- a/re2c/lib/regexec.cc +++ b/re2c/lib/regexec.cc @@ -29,8 +29,6 @@ int regexec(const regex_t *preg, const char *string, size_t nmatch, namespace re2c { namespace libre2c { -const int32_t history_t::ROOT = 0; - int finalize(const simctx_t &ctx, const char *string, size_t nmatch, regmatch_t pmatch[]) { @@ -47,14 +45,14 @@ int finalize(const simctx_t &ctx, const char *string, size_t nmatch, bool *done = ctx.done; memset(done, 0, ctx.nsub * sizeof(bool)); - for (int32_t i = ctx.hidx; todo > 0 && i != history_t::ROOT; ) { - const history_t::node_t &n = ctx.hist.node(i); + for (int32_t i = ctx.hidx; todo > 0 && i != HROOT; ) { + const tag_history_t::node_t &n = ctx.hist.node(i); const Tag &tag = tags[n.info.idx]; const size_t t = tag.ncap; if (!fictive(tag) && t < nmatch * 2 && !done[t]) { done[t] = true; --todo; - const regoff_t off = n.info.neg ? -1 : static_cast(n.step); + const regoff_t off = n.info.neg ? -1 : static_cast(ctx.hist.node2(i).step); m = &pmatch[t / 2 + 1]; if (t % 2 == 0) { m->rm_so = off; @@ -75,8 +73,8 @@ simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags) , flags(flags) , reach() , state() - , hist(nfa->size) - , hidx(history_t::ROOT) + , hist() + , hidx(HROOT) , step(0) , rule(Rule::NONE) , cursor(NULL) @@ -149,7 +147,7 @@ void init(simctx_t &ctx, const char *string) ctx.reach.clear(); ctx.state.clear(); ctx.hist.init(); - ctx.hidx = history_t::ROOT; + ctx.hidx = HROOT; ctx.step = 0; ctx.rule = Rule::NONE; ctx.cursor = ctx.marker = string; @@ -162,14 +160,5 @@ void init(simctx_t &ctx, const char *string) DASSERT(ctx.gtop_heap.empty()); } -history_t::history_t(size_t /* nstates */) - : nodes() - , arcs() -{ - // nodes.reserve(nstates); - // arcs.reserve(nstates); - init(); -} - } // namespace libre2c } // namespace re2c diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc index 3e24ce75..ede6ba9d 100644 --- a/re2c/lib/regexec_nfa_leftmost.cc +++ b/re2c/lib/regexec_nfa_leftmost.cc @@ -21,7 +21,7 @@ int regexec_nfa_leftmost(const regex_t *preg, const char *string init(ctx, string); // root state can be non-core, so we pass zero as origin to avoid checks - const conf_t c0(ctx.nfa->root, 0, history_t::ROOT); + const conf_t c0(ctx.nfa->root, 0, HROOT); ctx.reach.push_back(c0); closure_leftmost(ctx); @@ -77,7 +77,7 @@ void reach_on_symbol(simctx_t &ctx, uint32_t sym) if (s->type == nfa_state_t::RAN) { for (const Range *r = s->ran.ran; r; r = r->next()) { if (r->lower() <= sym && sym < r->upper()) { - conf_t c(s->ran.out, s->coreid, history_t::ROOT); + conf_t c(s->ran.out, s->coreid, HROOT); reach.push_back(c); update_offsets(ctx, *i); break; @@ -120,7 +120,7 @@ void closure_leftmost(simctx_t &ctx) break; case nfa_state_t::TAG: wl.push_back(conf_t(n->tag.out, o - , ctx.hist.push(h, ctx.step, n->tag.info, o))); + , ctx.hist.push(h, n->tag.info))); break; default: break; @@ -147,8 +147,8 @@ void update_offsets(simctx_t &ctx, const conf_t &c) memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t)); memset(done, 0, nsub * sizeof(bool)); - for (int32_t i = c.thist; i != history_t::ROOT; ) { - const history_t::node_t &n = ctx.hist.node(i); + for (int32_t i = c.thist; i != HROOT; ) { + const tag_history_t::node_t &n = ctx.hist.node(i); const size_t t = n.info.idx; if (!done[t]) { done[t] = true; diff --git a/re2c/lib/regexec_nfa_leftmost_trie.cc b/re2c/lib/regexec_nfa_leftmost_trie.cc index 7329fbd1..1ed3d605 100644 --- a/re2c/lib/regexec_nfa_leftmost_trie.cc +++ b/re2c/lib/regexec_nfa_leftmost_trie.cc @@ -20,7 +20,7 @@ int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string init(ctx, string); nfa_state_t *s0 = ctx.nfa->root; - const conf_t c0(s0, s0->coreid, history_t::ROOT); + const conf_t c0(s0, s0->coreid, HROOT); ctx.reach.push_back(c0); closure_leftmost(ctx); @@ -95,7 +95,7 @@ void closure_leftmost(simctx_t &ctx) break; case nfa_state_t::TAG: wl.push_back(conf_t(n->tag.out, o - , ctx.hist.push(h, ctx.step, n->tag.info, o))); + , ctx.hist.push2(h, ctx.step, n->tag.info, o))); break; case nfa_state_t::RAN: break; diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc index 26f58107..a9fb3a97 100644 --- a/re2c/lib/regexec_nfa_posix.cc +++ b/re2c/lib/regexec_nfa_posix.cc @@ -54,7 +54,7 @@ int do_regexec(const regex_t *preg, const char *string init(ctx, string); // root state can be non-core, so we pass zero as origin to avoid checks - const conf_t c0(ctx.nfa->root, 0, history_t::ROOT); + const conf_t c0(ctx.nfa->root, 0, HROOT); ctx.reach.push_back(c0); closure_posix(ctx); for (;;) { @@ -102,7 +102,7 @@ void make_one_step(simctx_t &ctx, uint32_t sym) if (s->type == nfa_state_t::RAN) { for (const Range *r = s->ran.ran; r; r = r->next()) { if (r->lower() <= sym && sym < r->upper()) { - const conf_t c(s->ran.out, s->coreid, history_t::ROOT); + const conf_t c(s->ran.out, s->coreid, HROOT); reach.push_back(c); state[j++] = *i; update_offsets(ctx, *i); @@ -234,7 +234,7 @@ bool scan(simctx_t &ctx, nfa_state_t *q, bool all) case nfa_state_t::TAG: if (q->arcidx == 0) { any |= relax_gor1(ctx, conf_t(q->tag.out, o - , ctx.hist.push(q->tag.info, h))); + , ctx.hist.push1(h, q->tag.info))); ++q->arcidx; } break; @@ -310,7 +310,7 @@ void closure_posix(simctx_t &ctx) break; case nfa_state_t::TAG: relax_gtop(ctx, conf_t(q->tag.out, o - , ctx.hist.push(q->tag.info, h))); + , ctx.hist.push1(h, q->tag.info))); break; default: break; @@ -355,7 +355,7 @@ int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y } const std::vector &tags = ctx.nfa->tags; - history_t &hist = ctx.hist; + tag_history_t &hist = ctx.hist; const bool fork_frame = orig1 == orig2; if (fork_frame) { @@ -370,19 +370,19 @@ int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y int32_t i1 = idx1, i2 = idx2; for (; i1 != i2; ) { if (i1 > i2) { - const history_t::node_t &n = hist.node(i1); + const tag_history_t::node_t &n = hist.node(i1); info1 = n.info; prec1 = std::min(prec1, tags[info1.idx].height); i1 = n.pred; } else { - const history_t::node_t &n = hist.node(i2); + const tag_history_t::node_t &n = hist.node(i2); info2 = n.info; prec2 = std::min(prec2, tags[info2.idx].height); i2 = n.pred; } } - if (i1 != history_t::ROOT) { + if (i1 != HROOT) { DASSERT(fork_frame); const int32_t h = tags[hist.node(i1).info.idx].height; prec1 = std::min(prec1, h); @@ -449,8 +449,8 @@ void update_offsets(simctx_t &ctx, const conf_t &c) memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t)); memset(done, 0, nsub * sizeof(bool)); - for (int32_t i = c.thist; i != history_t::ROOT; ) { - const history_t::node_t &n = ctx.hist.node(i); + for (int32_t i = c.thist; i != HROOT; ) { + const tag_history_t::node_t &n = ctx.hist.node(i); const Tag &tag = tags[n.info.idx]; const size_t t = tag.ncap; regoff_t *off = o + t; @@ -471,7 +471,7 @@ void update_prectbl(simctx_t &ctx) std::vector stack = ctx.worklist; std::vector &level = ctx.histlevel; std::vector::reverse_iterator li, lj, lk, le; - history_t &hist = ctx.hist; + tag_history_t &hist = ctx.hist; const size_t ncores = ctx.nfa->ncores; int32_t *oldtbl = ctx.prectbl1, *newtbl = ctx.prectbl2; @@ -482,20 +482,20 @@ void update_prectbl(simctx_t &ctx) // array of boundaries in the sorted configuration array. uint32_t maxfin = 0; for (cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) { - history_t::node_t &n = hist.node(c->thist); - if (n.finidx >= USED) { - n.finidx = maxfin++; - fcount[n.finidx] = 0; + uint32_t &x = hist.node1(c->thist).finidx; + if (x >= USED) { + x = maxfin++; + fcount[x] = 0; // mark all nodes down to root as used (unless marked already) - for (int32_t i = n.pred; i >= history_t::ROOT; ) { - history_t::node_t &m = hist.node(i); - if (m.finidx <= USED) break; - m.finidx = USED; - i = m.pred; + for (int32_t i = hist.node(c->thist).pred; i >= HROOT; ) { + uint32_t &y = hist.node1(i).finidx; + if (y <= USED) break; + y = USED; + i = hist.node(i).pred; } } - ++fcount[n.finidx]; + ++fcount[x]; } fcount[maxfin] = 0; for (size_t i = 1; i <= maxfin; ++i) { @@ -503,7 +503,7 @@ void update_prectbl(simctx_t &ctx) } sortcores.resize(state.size()); for (rcconfiter_t c = state.rbegin(), e = state.rend(); c != e; ++c) { - sortcores[--fcount[hist.node(c->thist).finidx]] = &*c; + sortcores[--fcount[hist.node1(c->thist).finidx]] = &*c; } // Depth-first traversal of the history tree. During traversal we grow @@ -516,7 +516,7 @@ void update_prectbl(simctx_t &ctx) stack.push_back(0); while (!stack.empty()) { const int32_t n = stack.back(); - history_t::node_t &node = hist.node(n); + tag_history_t::node1_t &node = hist.node1(n); const uint32_t fidx = node.finidx; if (fidx == NONFIN) { @@ -527,14 +527,14 @@ void update_prectbl(simctx_t &ctx) if (node.next != -1) { // start or continue visiting subtrees rooted at this node - const history_t::arc_t &arc = hist.arc(node.next); + const tag_history_t::arc_t &arc = hist.arc(node.next); stack.push_back(arc.node); node.next = arc.next; continue; } // all subtrees visited, it's time to process this node - const int32_t h = n == 0 ? MAX_RHO : tags[node.info.idx].height; + const int32_t h = n == 0 ? MAX_RHO : tags[hist.node(n).info.idx].height; li = level.rbegin(); le = level.rend(); @@ -542,7 +542,7 @@ void update_prectbl(simctx_t &ctx) // this node has leaf configurations, add them to level for (uint32_t k = fcount[fidx], e = fcount[fidx + 1]; k < e; ++k) { const conf_t *c = sortcores[k]; - const histleaf_t l = {c->state->coreid, c->origin, history_t::ROOT, h}; + const histleaf_t l = {c->state->coreid, c->origin, HROOT, h}; level.push_back(l); } @@ -576,7 +576,7 @@ void update_prectbl(simctx_t &ctx) // their precedence has already been computed and must not be touched. for (int32_t a = node.last; a != -1; ) { - const history_t::arc_t &arc = hist.arc(a); + const tag_history_t::arc_t &arc = hist.arc(a); a = arc.prev; // for all the items of this subtree diff --git a/re2c/lib/regexec_nfa_posix_trie.cc b/re2c/lib/regexec_nfa_posix_trie.cc index 6a0167a8..11b4dea1 100644 --- a/re2c/lib/regexec_nfa_posix_trie.cc +++ b/re2c/lib/regexec_nfa_posix_trie.cc @@ -48,8 +48,8 @@ static int32_t precedence_(simctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, // we *do* want this to be inlined static inline void relax(simctx_t &, const conf_t &); -static inline uint32_t get_step(const history_t &hist, int32_t idx); -static inline uint32_t get_orig(const history_t &hist, int32_t idx); +static inline uint32_t get_step(const tag_history_t &hist, int32_t idx); +static inline uint32_t get_orig(const tag_history_t &hist, int32_t idx); int regexec_nfa_posix_trie(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) @@ -58,7 +58,7 @@ int regexec_nfa_posix_trie(const regex_t *preg, const char *string init(ctx, string); nfa_state_t *s0 = ctx.nfa->root; - const conf_t c0(s0, s0->coreid, history_t::ROOT); + const conf_t c0(s0, s0->coreid, HROOT); ctx.reach.push_back(c0); closure_posix(ctx); for (;;) { @@ -151,7 +151,7 @@ void closure_posix(simctx_t &ctx) break; case nfa_state_t::TAG: relax(ctx, conf_t(q->tag.out, o - , ctx.hist.push(h, ctx.step, q->tag.info, o))); + , ctx.hist.push2(h, ctx.step, q->tag.info, o))); break; default: break; @@ -243,7 +243,7 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 } const std::vector &tags = ctx.nfa->tags; - history_t &hist = ctx.hist; + tag_history_t &hist = ctx.hist; int32_t prec = 0; prec1 = prec2 = MAX_RHO; @@ -260,14 +260,14 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 tag_info_t info1, info2; for (; i1 != i2 && (s1 >= s || s2 >= s);) { if (s1 >= s && (i1 > i2 || s2 < s)) { - const history_t::node_t &n = hist.node(i1); + const tag_history_t::node_t &n = hist.node(i1); info1 = n.info; prec1 = std::min(prec1, tags[info1.idx].height); i1 = n.pred; s1 = get_step(hist, i1); } else { - const history_t::node_t &n = hist.node(i2); + const tag_history_t::node_t &n = hist.node(i2); info2 = n.info; prec2 = std::min(prec2, tags[info2.idx].height); i2 = n.pred; @@ -281,7 +281,7 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 prec1 = std::min(prec1, p1); prec2 = std::min(prec2, p2); } - else if (i1 != history_t::ROOT) { + else if (i1 != HROOT) { const int32_t h = tags[hist.node(i1).info.idx].height; prec1 = std::min(prec1, h); prec2 = std::min(prec2, h); @@ -325,14 +325,14 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 return 0; } -uint32_t get_step(const history_t &hist, int32_t idx) +uint32_t get_step(const tag_history_t &hist, int32_t idx) { - return idx == history_t::ROOT ? 0 : hist.node(idx).step; + return idx == HROOT ? 0 : hist.node2(idx).step; } -uint32_t get_orig(const history_t &hist, int32_t idx) +uint32_t get_orig(const tag_history_t &hist, int32_t idx) { - return idx == history_t::ROOT ? 0 : hist.node(idx).orig; + return idx == HROOT ? 0 : hist.node2(idx).orig; } } // namespace libre2c diff --git a/re2c/lib/test.cc b/re2c/lib/test.cc index 6c1dd7c8..2b33b16b 100644 --- a/re2c/lib/test.cc +++ b/re2c/lib/test.cc @@ -510,7 +510,10 @@ static int test_all_posix(int flags) T4("(ab|a)(c|bcd)(d|.*)", "abcd", 0,4, 0,2, 2,3, 3,4); T4("(ab|a)(bcd|c)(d|.*)", "abcd", 0,4, 0,2, 2,3, 3,4); - if (!(flags & REG_SLOWPREC)) { + if (!(flags & REG_NFA)) { + T3("((a?){1,100})*", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0,50, 0,50, 49,50); + } + else if (!(flags & REG_SLOWPREC)) { T3("((a?){1,1000})*", "aaaa", 0,4, 0,4, 3,4); } @@ -950,7 +953,7 @@ int main() { int e = 0; -// e |= test_all_posix(0); + e |= test_all_posix(0); e |= test_all_posix(REG_NFA); e |= test_all_posix(REG_NFA | REG_GTOP); e |= test_all_posix(REG_NFA | REG_TRIE); diff --git a/re2c/src/debug/dump_dfa.cc b/re2c/src/debug/dump_dfa.cc index 1ddb9cbd..93da6a82 100644 --- a/re2c/src/debug/dump_dfa.cc +++ b/re2c/src/debug/dump_dfa.cc @@ -160,16 +160,17 @@ void dump_history(const dfa_t &dfa, const tag_history_t &h, hidx_t i) return; } - dump_history(dfa, h, h.pred(i)); + const tag_history_t::node_t &n = h.node(i); - const Tag &t = dfa.tags[h.tag(i)]; - const tagver_t v = h.elem(i); + dump_history(dfa, h, n.pred); + + const Tag &t = dfa.tags[n.info.idx]; if (capture(t)) { fprintf(stderr, "%u", (uint32_t)t.ncap); } else if (!trailing(t)) { fprintf(stderr, "%s", t.name->c_str()); } - fprintf(stderr, v == TAGVER_BOTTOM ? "↓" : "↑"); + fprintf(stderr, n.info.neg ? "↓" : "↑"); fprintf(stderr, " "); } @@ -282,23 +283,19 @@ void dump_tags(const tagver_table_t &tagvertbl, const tag_history_t &taghistory, fprintf(stderr, "/"); const tagver_t *vers = tagvertbl[tvers]; - for (size_t i = 0; i < tagvertbl.ntags; ++i) { + for (size_t t = 0; t < tagvertbl.ntags; ++t) { - if (taghistory.last(ttran, i) == TAGVER_ZERO) { + if (taghistory.last(ttran, t) == TAGVER_ZERO) { continue; } - fprintf(stderr, "%d", abs(vers[i])); - for (hidx_t t = ttran; t != HROOT; t = taghistory.pred(t)) { - if (taghistory.tag(t) != i) { - continue; - } - else if (taghistory.elem(t) < TAGVER_ZERO) { - fprintf(stderr, "↓"); - } - else if (t > TAGVER_ZERO) { - fprintf(stderr, "↑"); + fprintf(stderr, "%d", abs(vers[t])); + for (hidx_t i = ttran; i != HROOT; ) { + const tag_history_t::node_t &n = taghistory.node(i); + if (n.info.idx == t) { + fprintf(stderr, n.info.neg ? "↓" : "↑"); } + i = n.pred; } fprintf(stderr, " "); } diff --git a/re2c/src/dfa/closure_posix.cc b/re2c/src/dfa/closure_posix.cc index 9059a74a..6c2d309e 100644 --- a/re2c/src/dfa/closure_posix.cc +++ b/re2c/src/dfa/closure_posix.cc @@ -36,6 +36,8 @@ void closure_posix(determ_context_t &ctx) { DRESET_CLSTATS(ctx); + ctx.dc_taghistory.detach(); + switch (ctx.dc_opts->posix_closure) { case POSIX_CLOSURE_GOR1: closure_posix_gor1(ctx); break; case POSIX_CLOSURE_GTOP: closure_posix_gtop(ctx); break; @@ -162,7 +164,7 @@ bool scan(determ_context_t &ctx, nfa_state_t *q, bool all) case nfa_state_t::TAG: if (q->arcidx == 0) { x.state = q->tag.out; - x.tlook = ctx.dc_taghistory.push(x.tlook, q->tag.info); + x.tlook = ctx.dc_taghistory.push1(x.tlook, q->tag.info); any |= relax_gor1(ctx, x); ++q->arcidx; } @@ -260,7 +262,7 @@ void closure_posix_gtop(determ_context_t &ctx) break; case nfa_state_t::TAG: x.state = q->tag.out; - x.tlook = ctx.dc_taghistory.push(x.tlook, q->tag.info); + x.tlook = ctx.dc_taghistory.push1(x.tlook, q->tag.info); relax_gtop(ctx, x); break; default: diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc index 98d07947..cd6791c8 100644 --- a/re2c/src/dfa/find_state.cc +++ b/re2c/src/dfa/find_state.cc @@ -404,8 +404,10 @@ void unwind(const tag_history_t &hist, tag_path_t &path, hidx_t idx) // But this would complicate unwind procedure quite a bit, and the // cases when it makes any difference are rare. path.clear(); - for (; idx != HROOT; idx = hist.pred(idx)) { - path.push_back(hist.info(idx)); + for (; idx != HROOT; ) { + const tag_history_t::node_t &n = hist.node(idx); + path.push_back(n.info); + idx = n.pred; } } diff --git a/re2c/src/dfa/posix_precedence.cc b/re2c/src/dfa/posix_precedence.cc index d7a89e3d..e9403e64 100644 --- a/re2c/src/dfa/posix_precedence.cc +++ b/re2c/src/dfa/posix_precedence.cc @@ -38,20 +38,22 @@ int32_t precedence(determ_context_t &ctx, const clos_t &x, const clos_t &y int32_t i1 = idx1, i2 = idx2; for (; i1 != i2; ) { if (i1 > i2) { - info1 = hist.info(i1); + const tag_history_t::node_t &n = hist.node(i1); + info1 = n.info; prec1 = std::min(prec1, tags[info1.idx].height); - i1 = hist.pred(i1); + i1 = n.pred; } else { - info2 = hist.info(i2); + const tag_history_t::node_t &n = hist.node(i2); + info2 = n.info; prec2 = std::min(prec2, tags[info2.idx].height); - i2 = hist.pred(i2); + i2 = n.pred; } DINCCOUNT_CLLENGTH(ctx, 1); } if (i1 != HROOT) { DASSERT(fork_frame); - const int32_t h = tags[hist.info(i1).idx].height; + const int32_t h = tags[hist.node(i1).info.idx].height; prec1 = std::min(prec1, h); prec2 = std::min(prec2, h); } diff --git a/re2c/src/dfa/tag_history.h b/re2c/src/dfa/tag_history.h index 5a4342ff..e58e6003 100644 --- a/re2c/src/dfa/tag_history.h +++ b/re2c/src/dfa/tag_history.h @@ -14,45 +14,123 @@ namespace re2c typedef int32_t hidx_t; typedef int32_t prectable_t; -struct clos_t; - -static const hidx_t HROOT = -1; - typedef std::vector tag_path_t; +static const hidx_t HROOT = 0; +const tag_info_t NOINFO = {0x3fffFFFF, 0}; +static const uint32_t NONFIN = ~0u; +static const uint32_t USED = NONFIN - 1; + +// Different algorithms need to keep slightly different data in history. +// We store main data in one array, and auxilary data in separate arrays +// (this allows to avoid overhead in algorithms that don't need it). struct tag_history_t { - // the whole tree of tags found by the epsilon-closure - // (a bunch of separate subtrees for each tag with common root) struct node_t { - hidx_t pred; tag_info_t info; + hidx_t pred; + + inline node_t(tag_info_t info, hidx_t pred) + : info(info), pred(pred) {} + }; + + struct arc_t { + hidx_t node; + hidx_t prev; + hidx_t next; + + inline arc_t(hidx_t node, hidx_t prev, hidx_t next) + : node(node), prev(prev), next(next) {} + }; + + struct node1_t { + hidx_t last; + hidx_t next; + uint32_t finidx; + + inline node1_t(hidx_t last, hidx_t next) + : last(last), next(next), finidx(NONFIN) {} + }; + + struct node2_t { + uint32_t step; + uint32_t orig; + + inline node2_t(uint32_t step, uint32_t orig) + : step(step), orig(orig) {} }; + + // main history in the form of a backward-linked trie std::vector nodes; - inline tag_history_t(): nodes() {}; - inline const node_t &at(hidx_t i) const { return nodes[static_cast(i)]; } - inline tag_info_t info(hidx_t i) const { return at(i).info; } - inline hidx_t pred(hidx_t i) const { return at(i).pred; } - inline tagver_t elem(hidx_t i) const { return at(i).info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; } - inline size_t tag(hidx_t i) const { return at(i).info.idx; } - inline hidx_t push(hidx_t i, tag_info_t info); + // forward-linked history used by POSIX disambiguation + std::vector nodes1; + std::vector arcs; + + // auxilary data used by lazy POSIX disambiguation + std::vector nodes2; + + inline tag_history_t(); + inline void init(); + inline void detach(); + + inline node_t &node(hidx_t i) { return nodes[static_cast(i)]; } + inline node1_t &node1(hidx_t i) { return nodes1[static_cast(i)]; } + inline node2_t &node2(hidx_t i) { return nodes2[static_cast(i)]; } + inline arc_t &arc(hidx_t i) { return arcs[static_cast(i)]; } + inline const node_t &node(hidx_t i) const { return nodes[static_cast(i)]; } + inline const node1_t &node1(hidx_t i) const { return nodes1[static_cast(i)]; } + inline const node2_t &node2(hidx_t i) const { return nodes2[static_cast(i)]; } + inline const arc_t &arc(hidx_t i) const { return arcs[static_cast(i)]; } + + inline hidx_t push(hidx_t idx, tag_info_t info); + inline hidx_t push1(hidx_t idx, tag_info_t info); + inline hidx_t push2(hidx_t idx, uint32_t step, tag_info_t info, uint32_t orig); + inline tagver_t last(hidx_t i, size_t t) const; inline int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const; + FORBID_COPY(tag_history_t); }; -hidx_t tag_history_t::push(hidx_t idx, tag_info_t info) +tag_history_t::tag_history_t() + : nodes() + , nodes1() + , arcs() + , nodes2() { - node_t x = {idx, info}; - nodes.push_back(x); - return static_cast(nodes.size() - 1); + init(); +} + +void tag_history_t::init() +{ + nodes.clear(); + nodes1.clear(); + arcs.clear(); + nodes2.clear(); + + nodes.push_back(node_t(NOINFO, -1)); + nodes1.push_back(node1_t(-1, -1)); + nodes2.push_back(node2_t(0, 0)); +} + +void tag_history_t::detach() +{ + // don't delete existing tree, just detach it from root + // pointers to old tree are still valid, but traversals will ignore it + node1_t &n = node1(0); + n.last = n.next = -1; + n.finidx = NONFIN; } tagver_t tag_history_t::last(hidx_t i, size_t t) const { - for (; i != HROOT; i = pred(i)) { - if (tag(i) == t) return elem(i); + for (; i != HROOT; ) { + const node_t &n = node(i); + if (n.info.idx == t) { + return n.info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; + } + i = n.pred; } return TAGVER_ZERO; } @@ -64,18 +142,58 @@ int32_t tag_history_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const // compare in reverse, from tail to head: direction makes // no difference when comparing for exact coincidence for (;;) { - for (; x != HROOT && tag(x) != t; x = pred(x)); - for (; y != HROOT && tag(y) != t; y = pred(y)); + for (; x != HROOT && node(x).info.idx != t; x = node(x).pred); + for (; y != HROOT && node(y).info.idx != t; y = node(y).pred); + if (x == y) return 0; if (x == HROOT) return -1; if (y == HROOT) return 1; - if (elem(x) > elem(y)) return -1; - if (elem(x) < elem(y)) return 1; - x = pred(x); - y = pred(y); + + const node_t &nx = node(x), &ny = node(y); + + if (nx.info.neg > ny.info.neg) return -1; + if (nx.info.neg < ny.info.neg) return 1; + + x = nx.pred; + y = ny.pred; } } +int32_t tag_history_t::push(int32_t idx, tag_info_t info) +{ + const int32_t i = static_cast(nodes.size()); + nodes.push_back(node_t(info, idx)); + return i; +} + +int32_t tag_history_t::push1(int32_t idx, tag_info_t info) +{ + const int32_t i = static_cast(nodes.size()); + if (idx != -1) { + node1_t &n = node1(idx); + const int32_t a = static_cast(arcs.size()); + arcs.push_back(arc_t(i, n.last, -1)); + if (n.next == -1) { + n.next = a; + } + else { + arc(n.last).next = a; + } + n.last = a; + } + nodes.push_back(node_t(info, idx)); + nodes1.push_back(node1_t(-1, -1)); + return i; +} + +int32_t tag_history_t::push2(int32_t idx, uint32_t step, tag_info_t info, uint32_t orig) +{ + const int32_t i = static_cast(nodes.size()); + nodes.push_back(node_t(info, idx)); + nodes2.push_back(node2_t(step, orig)); + return i; +} + } // namespace re2c #endif // _RE2C_DFA_TAG_HISTORY_ diff --git a/re2c/src/dfa/tcmd.cc b/re2c/src/dfa/tcmd.cc index d18f9796..3746924a 100644 --- a/re2c/src/dfa/tcmd.cc +++ b/re2c/src/dfa/tcmd.cc @@ -161,8 +161,10 @@ tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tag_history_t &history, hidx_t hidx, size_t tag) { size_t hlen = 0; - for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { - if (history.tag(i) == tag) ++hlen; + for (hidx_t i = hidx; i != HROOT; ) { + const tag_history_t::node_t &n = history.node(i); + if (n.info.idx == tag) ++hlen; + i = n.pred; } const size_t size = sizeof(tcmd_t) + hlen * sizeof(tagver_t); @@ -171,10 +173,12 @@ tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, p->lhs = lhs; p->rhs = rhs; tagver_t *h = p->history; - for (hidx_t i = hidx; i != HROOT; i = history.pred(i)) { - if (history.tag(i) == tag) { - *h++ = history.elem(i); + for (hidx_t i = hidx; i != HROOT; ) { + const tag_history_t::node_t &n = history.node(i); + if (n.info.idx == tag) { + *h++ = n.info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; } + i = n.pred; } *h++ = TAGVER_ZERO; return p; diff --git a/re2c/test/debug/dfa_raw.i--posix-captures--dump-dfa-raw.c b/re2c/test/debug/dfa_raw.i--posix-captures--dump-dfa-raw.c index 06d12432..b952a29e 100644 --- a/re2c/test/debug/dfa_raw.i--posix-captures--dump-dfa-raw.c +++ b/re2c/test/debug/dfa_raw.i--posix-captures--dump-dfa-raw.c @@ -47,8 +47,8 @@ digraph DFA { r0 [shape=none label="(5 0 6 7)"] 0:0:e -> r0 [style=dotted label="/7↓ 6↓ 5↑ "] 1 [label=<
0 8 2 9 4 /3↑ 1↑
4 8 2 9 4 /3↑ 2↑
>] - 0:1:e -> 1:0:w [label="1/8 9↑ "] - 0:1:e -> 1:1:w [label="1/8 9↑ "] + 0:1:e -> 1:0:w [label="1/8↑ 9↑ "] + 0:1:e -> 1:1:w [label="1/8↑ 9↑ "] r1 [shape=none label="(5 0 6 7)"] 1:1:e -> r1 [style=dotted label="/6=9 5=8 7↑ "] i1 [label=<
0 8 2 10 11 /3↑ 1↑
4 8 2 10 11 /3↑ 2↑
>] -- 2.40.0