From: Ulya Trofimovich Date: Mon, 4 Mar 2019 11:15:24 +0000 (+0000) Subject: Parameterized determinization/simulation context over history type. X-Git-Tag: 1.2~129 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2c2ed262774261a35445eb74dd0ae6cea7d13383;p=re2c Parameterized determinization/simulation context over history type. --- diff --git a/re2c/lib/regex_impl.h b/re2c/lib/regex_impl.h index d29fdb77..4e77efdc 100644 --- a/re2c/lib/regex_impl.h +++ b/re2c/lib/regex_impl.h @@ -50,14 +50,6 @@ typedef confset_t::iterator confiter_t; typedef confset_t::const_iterator cconfiter_t; typedef confset_t::const_reverse_iterator rcconfiter_t; -enum sema_t {POSIX, LEFTMOST}; -enum eval_t {STRICT, LAZY}; - -template struct history_type_t; -template<> struct history_type_t {typedef tag_history_t type;}; -template<> struct history_type_t {typedef tag_history_t type;}; -template struct history_type_t {typedef tag_history_t type;}; - template struct simctx_t { @@ -238,14 +230,14 @@ int finalize(const simctx_t &ctx, const char *string, size_t nmatch, memset(done, 0, ctx.nsub * sizeof(bool)); for (int32_t i = ctx.hidx; todo > 0 && i != HROOT; ) { - const tag_history_t::node_t &n = ctx.history.node(i); + const typename simctx_t::history_t::node_t &n = ctx.history.node(i); const Tag &tag = tags[n.info.idx]; const size_t t = tag.ncap; if (!fictive(tag) && t < nmatch * 2 && !done[t]) { done[t] = true; --todo; const regoff_t off = n.info.neg ? -1 - : static_cast(ctx.history.node2(i).step); + : static_cast(n.step); m = &pmatch[t / 2 + 1]; if (t % 2 == 0) { m->rm_so = off; diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc index 1fa8b0b2..f1bf6ad8 100644 --- a/re2c/lib/regexec_nfa_leftmost.cc +++ b/re2c/lib/regexec_nfa_leftmost.cc @@ -148,7 +148,7 @@ void update_offsets(lctx_t &ctx, const conf_t &c) memset(done, 0, nsub * sizeof(bool)); for (int32_t i = c.thist; i != HROOT; ) { - const tag_history_t::node_t &n = ctx.history.node(i); + const lctx_t::history_t::node_t &n = ctx.history.node(i); const size_t t = n.info.idx; if (!done[t]) { done[t] = true; diff --git a/re2c/lib/regexec_nfa_leftmost_trie.cc b/re2c/lib/regexec_nfa_leftmost_trie.cc index 30bf605a..d6a86158 100644 --- a/re2c/lib/regexec_nfa_leftmost_trie.cc +++ b/re2c/lib/regexec_nfa_leftmost_trie.cc @@ -95,7 +95,7 @@ void closure_leftmost(lzctx_t &ctx) break; case nfa_state_t::TAG: wl.push_back(conf_t(n->tag.out, o - , ctx.history.push2(h, ctx.step, n->tag.info, o))); + , ctx.history.push(h, ctx.step, n->tag.info, o))); break; case nfa_state_t::RAN: break; diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc index 01b71b47..7066a516 100644 --- a/re2c/lib/regexec_nfa_posix.cc +++ b/re2c/lib/regexec_nfa_posix.cc @@ -156,7 +156,7 @@ void update_offsets(pctx_t &ctx, const conf_t &c, uint32_t id) memset(done, 0, nsub * sizeof(bool)); for (int32_t i = c.thist; i != HROOT; ) { - const tag_history_t::node_t &n = ctx.history.node(i); + const phistory_t::node_t &n = ctx.history.node(i); const Tag &tag = tags[n.info.idx]; const size_t t = tag.ncap; regoff_t *off = o + t; diff --git a/re2c/lib/regexec_nfa_posix_trie.cc b/re2c/lib/regexec_nfa_posix_trie.cc index 6621250e..ef49be2e 100644 --- a/re2c/lib/regexec_nfa_posix_trie.cc +++ b/re2c/lib/regexec_nfa_posix_trie.cc @@ -7,6 +7,7 @@ #include "src/options/opt.h" #include "src/debug/debug.h" #include "src/dfa/determinization.h" +#include "src/dfa/posix_precedence.h" #include "src/nfa/nfa.h" @@ -48,8 +49,8 @@ static int32_t precedence_(pzctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, // we *do* want this to be inlined static inline void relax(pzctx_t &, const conf_t &); -static inline uint32_t get_step(const tag_history_t &hist, int32_t idx); -static inline uint32_t get_orig(const tag_history_t &hist, int32_t idx); +static inline uint32_t get_step(const zhistory_t &hist, int32_t idx); +static inline uint32_t get_orig(const zhistory_t &hist, int32_t idx); int regexec_nfa_posix_trie(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) @@ -151,7 +152,7 @@ void closure_posix(pzctx_t &ctx) break; case nfa_state_t::TAG: relax(ctx, conf_t(q->tag.out, o - , ctx.history.push2(h, ctx.step, q->tag.info, o))); + , ctx.history.push(h, ctx.step, q->tag.info, o))); break; default: break; @@ -243,7 +244,7 @@ int32_t precedence_(pzctx_t &ctx, int32_t idx1, int32_t idx2 } const std::vector &tags = ctx.nfa.tags; - tag_history_t &hist = ctx.history; + zhistory_t &hist = ctx.history; int32_t prec = 0; prec1 = prec2 = MAX_RHO; @@ -260,14 +261,14 @@ int32_t precedence_(pzctx_t &ctx, int32_t idx1, int32_t idx2 tag_info_t info1, info2; for (; i1 != i2 && (s1 >= s || s2 >= s);) { if (s1 >= s && (i1 > i2 || s2 < s)) { - const tag_history_t::node_t &n = hist.node(i1); + const zhistory_t::node_t &n = hist.node(i1); info1 = n.info; prec1 = std::min(prec1, tags[info1.idx].height); i1 = n.pred; s1 = get_step(hist, i1); } else { - const tag_history_t::node_t &n = hist.node(i2); + const zhistory_t::node_t &n = hist.node(i2); info2 = n.info; prec2 = std::min(prec2, tags[info2.idx].height); i2 = n.pred; @@ -325,14 +326,14 @@ int32_t precedence_(pzctx_t &ctx, int32_t idx1, int32_t idx2 return 0; } -uint32_t get_step(const tag_history_t &hist, int32_t idx) +uint32_t get_step(const zhistory_t &hist, int32_t idx) { - return idx == HROOT ? 0 : hist.node2(idx).step; + return idx == HROOT ? 0 : hist.node(idx).step; } -uint32_t get_orig(const tag_history_t &hist, int32_t idx) +uint32_t get_orig(const zhistory_t &hist, int32_t idx) { - return idx == HROOT ? 0 : hist.node2(idx).orig; + return idx == HROOT ? 0 : hist.node(idx).orig; } } // namespace libre2c diff --git a/re2c/src/debug/debug.h b/re2c/src/debug/debug.h index 44f0688e..d9fea7f2 100644 --- a/re2c/src/debug/debug.h +++ b/re2c/src/debug/debug.h @@ -37,7 +37,6 @@ namespace re2c { struct DFA; struct cfg_t; -struct determ_context_t; struct dfa_t; struct nfa_t; struct opt_t; @@ -57,7 +56,7 @@ struct dump_dfa_t explicit dump_dfa_t(const opt_t *); ~dump_dfa_t(); - void state(const determ_context_t &, bool); + template void state(const ctx_t &, bool); }; #define DASSERT(x) assert(x) @@ -80,9 +79,9 @@ void dump_dfa(const dfa_t &); void dump_adfa(const DFA &); void dump_cfg(const cfg_t &, const bool *); void dump_interf(const cfg_t &, const bool *); -void dump_clstats(const determ_context_t &); void dump_tcmd(const tcmd_t *); -void reset_clstats(determ_context_t &); +template void dump_clstats(const ctx_t &); +template void reset_clstats(ctx_t &); } // namespace re2c diff --git a/re2c/src/debug/dump_dfa.cc b/re2c/src/debug/dump_dfa.cc index 0eccfa07..1708573a 100644 --- a/re2c/src/debug/dump_dfa.cc +++ b/re2c/src/debug/dump_dfa.cc @@ -3,8 +3,6 @@ #include #include #include -#include -#include #include "src/options/opt.h" #include "src/debug/debug.h" @@ -20,11 +18,18 @@ namespace re2c { -static void dump_history(const dfa_t &, const tag_history_t &, hidx_t); +template void dump_history(const dfa_t &, const typename ctx_t::history_t &, hidx_t); +template void dump_tags(const tagver_table_t &, const typename ctx_t::history_t &, hidx_t, uint32_t); static void dump_tcmd_or_tcid(tcmd_t *const *, const tcid_t *, size_t, const tcpool_t &); static const char *tagname(const Tag &); -static void dump_tags(const tagver_table_t &, const tag_history_t &, hidx_t, uint32_t); +// explicit specialization for context types +template void dump_dfa_t::state(const pdetctx_t &ctx, bool isnew); +template void dump_dfa_t::state(const ldetctx_t &ctx, bool isnew); +template void dump_clstats(const pdetctx_t &); +template void dump_clstats(const ldetctx_t &); +template void reset_clstats(pdetctx_t &); +template void reset_clstats(ldetctx_t &); dump_dfa_t::dump_dfa_t(const opt_t *opts) : debug(opts->dump_dfa_raw) @@ -38,7 +43,6 @@ dump_dfa_t::dump_dfa_t(const opt_t *opts) " edge[arrowhead=vee fontname=Courier]\n\n"); } - dump_dfa_t::~dump_dfa_t() { if (!debug) return; @@ -46,8 +50,8 @@ dump_dfa_t::~dump_dfa_t() fprintf(stderr, "}\n"); } - -void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) +template +void dump_dfa_t::state(const ctx_t &ctx, bool isnew) { if (!debug) return; @@ -58,7 +62,7 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) const uint32_t symbol = ctx.dc_symbol; const dfa_t &dfa = ctx.dfa; const tagver_table_t &tvtbl = ctx.dc_tagvertbl; - const tag_history_t &thist = ctx.history; + const typename ctx_t::history_t &thist = ctx.history; uint32_t i; if (target == dfa_t::NIL) return; @@ -86,7 +90,7 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) } if (c->thist != HROOT) { - dump_history(dfa, thist, c->thist); + dump_history(dfa, thist, c->thist); } } @@ -101,7 +105,7 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) uint32_t i = 0; for (c = b; c != e; ++c, ++i) { fprintf(stderr, " void -> 0:%u:w [style=dotted label=\"", i); - dump_tags(tvtbl, thist, c->ttran, c->tvers); + dump_tags(tvtbl, thist, c->ttran, c->tvers); fprintf(stderr, "\"]\n"); } } @@ -123,7 +127,7 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) fprintf(stderr, " %u:%u:e -> %s%u:%u:w [label=\"%u", origin, c->origin, prefix, state, i, symbol); - dump_tags(tvtbl, thist, c->ttran, c->tvers); + dump_tags(tvtbl, thist, c->ttran, c->tvers); fprintf(stderr, "\"]\n"); } } @@ -152,17 +156,18 @@ void dump_dfa_t::state(const determ_context_t &ctx, bool isnew) } } - -void dump_history(const dfa_t &dfa, const tag_history_t &h, hidx_t i) +template +void dump_history(const dfa_t &dfa + , const typename ctx_t::history_t &h, hidx_t i) { if (i == HROOT) { fprintf(stderr, " /"); return; } - const tag_history_t::node_t &n = h.node(i); + const typename ctx_t::history_t::node_t &n = h.node(i); - dump_history(dfa, h, n.pred); + dump_history(dfa, h, n.pred); const Tag &t = dfa.tags[n.info.idx]; if (capture(t)) { @@ -174,7 +179,6 @@ void dump_history(const dfa_t &dfa, const tag_history_t &h, hidx_t i) fprintf(stderr, " "); } - void dump_dfa(const dfa_t &dfa) { const size_t @@ -238,7 +242,6 @@ void dump_dfa(const dfa_t &dfa) fprintf(stderr, "}\n"); } - void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, size_t sym, const tcpool_t &tcpool) { @@ -246,7 +249,6 @@ void dump_tcmd_or_tcid(tcmd_t *const *tcmd, const tcid_t *tcid, dump_tcmd(cmd); } - void dump_tcmd(const tcmd_t *p) { if (!p) return; @@ -269,15 +271,15 @@ void dump_tcmd(const tcmd_t *p) } } - const char *tagname(const Tag &t) { return t.name ? t.name->c_str() : ""; } - -void dump_tags(const tagver_table_t &tagvertbl, const tag_history_t &taghistory, - hidx_t ttran, uint32_t tvers) +template +void dump_tags(const tagver_table_t &tagvertbl + , const typename ctx_t::history_t &taghistory + , hidx_t ttran, uint32_t tvers) { if (ttran == HROOT) return; @@ -285,13 +287,13 @@ void dump_tags(const tagver_table_t &tagvertbl, const tag_history_t &taghistory, const tagver_t *vers = tagvertbl[tvers]; for (size_t t = 0; t < tagvertbl.ntags; ++t) { - if (taghistory.last(ttran, t) == TAGVER_ZERO) { + if (last(taghistory, ttran, t) == TAGVER_ZERO) { continue; } fprintf(stderr, "%d", abs(vers[t])); for (hidx_t i = ttran; i != HROOT; ) { - const tag_history_t::node_t &n = taghistory.node(i); + const typename ctx_t::history_t::node_t &n = taghistory.node(i); if (n.info.idx == t) { fprintf(stderr, n.info.neg ? "↓" : "↑"); } @@ -301,9 +303,8 @@ void dump_tags(const tagver_table_t &tagvertbl, const tag_history_t &taghistory, } } - - -void reset_clstats(determ_context_t &ctx) +template +void reset_clstats(ctx_t &ctx) { closure_stats_t &cs = ctx.dc_clstats; cs.nscans = 0; @@ -311,7 +312,8 @@ void reset_clstats(determ_context_t &ctx) cs.length = 0; } -void dump_clstats(const determ_context_t &ctx) +template +void dump_clstats(const ctx_t &ctx) { const closure_stats_t &cs = ctx.dc_clstats; if (ctx.dc_opts->dump_closure_stats) { diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index e6058246..24a11b1e 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -37,7 +37,6 @@ namespace re2c * Both disambiguation policies forbid epsilon-cycles with negative weight. */ - /* note [at most one final item per closure] * * By construction NFA has exactly one final state per rule. Thus closure @@ -56,7 +55,6 @@ namespace re2c * with the highest priority (see note [closure items are sorted by rule]). */ - /* note [the difference between TDFA(0) and TDFA(1)] * * TDFA(0) performs epsilon-closure after transition on symbol, @@ -76,37 +74,46 @@ namespace re2c * Thus in general TDFA(1) raises less conflicts than TDFA(0). */ - +template static inline void closure(ctx_t &ctx); +template static void generate_versions(ctx_t &); static void prune(closure_t &, std::valarray &); static void lower_lookahead_to_transition(closure_t &); -static void generate_versions(determ_context_t &); static bool cmpby_rule_state(const clos_t &, const clos_t &); +// explicit specialization for context types +template void tagged_epsilon_closure(pdetctx_t &ctx); +template void tagged_epsilon_closure(ldetctx_t &ctx); -void tagged_epsilon_closure(determ_context_t &ctx) +template +void tagged_epsilon_closure(ctx_t &ctx) { - closure_t &closure = ctx.state; - // build tagged epsilon-closure of the given set of NFA states - if (ctx.dc_opts->posix_semantics) { - closure_posix(ctx); - prune(closure, ctx.nfa.rules); - std::sort(closure.begin(), closure.end(), cmpby_rule_state); - compute_prectable(ctx); - } else { - closure_leftmost(ctx); - prune(closure, ctx.nfa.rules); - } + closure(ctx); // see note [the difference between TDFA(0) and TDFA(1)] if (!ctx.dc_opts->lookahead) { - lower_lookahead_to_transition(closure); + lower_lookahead_to_transition(ctx.state); } // merge tags from different rules, find nondeterministic tags generate_versions(ctx); } +template<> +inline void closure(pdetctx_t &ctx) +{ + closure_posix(ctx); + prune(ctx.state, ctx.nfa.rules); + std::sort(ctx.state.begin(), ctx.state.end(), cmpby_rule_state); + compute_prectable(ctx); +} + +template<> +inline void closure(ldetctx_t &ctx) +{ + closure_leftmost(ctx); + prune(ctx.state, ctx.nfa.rules); +} bool cmpby_rule_state(const clos_t &x, const clos_t &y) { @@ -120,7 +127,6 @@ bool cmpby_rule_state(const clos_t &x, const clos_t &y) return false; } - void prune(closure_t &closure, std::valarray &rules) { clositer_t b = closure.begin(), e = closure.end(), i, j; @@ -144,7 +150,6 @@ void prune(closure_t &closure, std::valarray &rules) closure.resize(n); } - void lower_lookahead_to_transition(closure_t &closure) { for (clositer_t c = closure.begin(); c != closure.end(); ++c) { @@ -153,8 +158,8 @@ void lower_lookahead_to_transition(closure_t &closure) } } - -void generate_versions(determ_context_t &ctx) +template +void generate_versions(ctx_t &ctx) { dfa_t &dfa = ctx.dfa; const std::vector &tags = dfa.tags; @@ -163,11 +168,11 @@ void generate_versions(determ_context_t &ctx) tagver_table_t &tvtbl = ctx.dc_tagvertbl; tagver_t *vers = tvtbl.buffer; closure_t &clos = ctx.state; - tag_history_t &thist = ctx.history; - newvers_t &newvers = ctx.dc_newvers; + typename ctx_t::history_t &thist = ctx.history; + typename ctx_t::newvers_t &newvers = ctx.dc_newvers; clositer_t b = clos.begin(), e = clos.end(), c; - newvers_t newacts(newver_cmp_t(thist, ctx.dc_hc_caches)); + typename ctx_t::newvers_t newacts(newver_cmp_t(thist, ctx.dc_hc_caches)); tcmd_t *cmd = NULL; // for each tag, if there is at least one tagged transition, @@ -182,7 +187,7 @@ void generate_versions(determ_context_t &ctx) for (size_t t = 0; t < ntag; ++t) { const Tag &tag = tags[t]; - const tagver_t h0 = thist.last(h, t); + const tagver_t h0 = last(thist, h, t); if (h0 == TAGVER_ZERO) continue; const tagver_t v = history(tag) ? vs[t] : TAGVER_ZERO; @@ -192,26 +197,26 @@ void generate_versions(determ_context_t &ctx) m = newvers.insert(std::make_pair(x, n)).first->second; if (n == m) ++maxver; - if (!fixed(tag) && (history(tag) || thist.last(l, t) == TAGVER_ZERO)) { + if (!fixed(tag) && (history(tag) || last(thist, l, t) == TAGVER_ZERO)) { newacts.insert(std::make_pair(x, m)); } } } // actions - for (newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) { + for (typename ctx_t::newvers_t::iterator i = newacts.begin(); i != newacts.end(); ++i) { const tagver_t m = i->second, v = i->first.base; const hidx_t h = i->first.history; const size_t t = i->first.tag; if (history(tags[t])) { cmd = dfa.tcpool.make_add(cmd, abs(m), abs(v), thist, h, t); } else { - cmd = dfa.tcpool.make_set(cmd, abs(m), thist.last(h, t)); + cmd = dfa.tcpool.make_set(cmd, abs(m), last(thist, h, t)); } } // mark tags with history - for (newvers_t::iterator j = newvers.begin(); j != newvers.end(); ++j) { + for (typename ctx_t::newvers_t::iterator j = newvers.begin(); j != newvers.end(); ++j) { if (history(tags[j->first.tag])) { dfa.mtagvers.insert(abs(j->second)); } @@ -226,7 +231,7 @@ void generate_versions(determ_context_t &ctx) for (size_t t = 0; t < ntag; ++t) { const tagver_t v0 = vs[t], - h0 = thist.last(h, t), + h0 = last(thist, h, t), v = history(tags[t]) ? v0 : TAGVER_ZERO; if (h0 == TAGVER_ZERO) { vers[t] = v0; @@ -241,4 +246,38 @@ void generate_versions(determ_context_t &ctx) ctx.dc_actions = cmd; } +template +bool newver_cmp_t::operator()(const newver_t &x, const newver_t &y) const +{ + if (x.tag < y.tag) return true; + if (x.tag > y.tag) return false; + + if (x.base < y.base) return true; + if (x.base > y.base) return false; + + hidx_t xh = x.history, yh = y.history; + if (xh == yh) return false; + + hc_cache_t &cache = caches[x.tag]; + int32_t cmp; + + bool invert = xh > yh; + if (invert) std::swap(xh, yh); + + uint64_t k = static_cast(xh); + k = (k << 32) | static_cast(yh); + + hc_cache_t::const_iterator i = cache.find(k); + if (i != cache.end()) { + cmp = i->second; + } + else { + cmp = compare_reversed(history, xh, yh, x.tag); + cache.insert(std::make_pair(k, cmp)); + } + + if (invert) cmp = -cmp; + return cmp < 0; +} + } // namespace re2c diff --git a/re2c/src/dfa/closure_leftmost.cc b/re2c/src/dfa/closure_leftmost.cc index 7b1ef1d9..64ef8e68 100644 --- a/re2c/src/dfa/closure_leftmost.cc +++ b/re2c/src/dfa/closure_leftmost.cc @@ -5,7 +5,7 @@ namespace re2c { -void closure_leftmost(determ_context_t &ctx) +void closure_leftmost(ldetctx_t &ctx) { const closure_t &init = ctx.reach; closure_t &done = ctx.state; diff --git a/re2c/src/dfa/closure_posix.h b/re2c/src/dfa/closure_posix.h index 9e59ef76..c694246b 100644 --- a/re2c/src/dfa/closure_posix.h +++ b/re2c/src/dfa/closure_posix.h @@ -27,7 +27,7 @@ template static inline bool scan(ctx_t &ctx, nfa_state_t *q, boo template static inline bool relax_gor1(ctx_t &, const typename ctx_t::conf_t &); template static inline void relax_gtop(ctx_t &, const typename ctx_t::conf_t &); -inline void closure_posix(determ_context_t &ctx) +inline void closure_posix(pdetctx_t &ctx) { DRESET_CLSTATS(ctx); @@ -167,7 +167,7 @@ bool scan(ctx_t &ctx, nfa_state_t *q, bool all) case nfa_state_t::TAG: if (q->arcidx == 0) { any |= relax_gor1(ctx, conf_t(x, q->tag.out - , ctx.history.push1(x.thist, q->tag.info))); + , ctx.history.push(x.thist, q->tag.info))); ++q->arcidx; } break; @@ -266,7 +266,7 @@ void closure_posix_gtop(ctx_t &ctx) break; case nfa_state_t::TAG: relax_gtop(ctx, conf_t(x, q->tag.out - , ctx.history.push1(x.thist, q->tag.info))); + , ctx.history.push(x.thist, q->tag.info))); break; default: break; diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 5858d391..a141a38e 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -22,16 +22,15 @@ namespace re2c { -static void clear_caches(determ_context_t &ctx); -static void reach_on_symbol(determ_context_t &ctx, uint32_t sym); +template static void determinization(ctx_t &ctx); +template static void clear_caches(ctx_t &ctx); +template static void reach_on_symbol(ctx_t &ctx, uint32_t sym); +template static uint32_t init_tag_versions(ctx_t &ctx); +template static void warn_nondeterministic_tags(const ctx_t &ctx); static nfa_state_t *transition(nfa_state_t *, uint32_t); -static uint32_t init_tag_versions(determ_context_t &); -static void warn_nondeterministic_tags(const determ_context_t &); - const uint32_t dfa_t::NIL = ~0u; - dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond , Msg &msg) : states() @@ -46,12 +45,34 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond , tcmd0(NULL) , tcid0(TCID0) { - determ_context_t ctx(opts, msg, cond, nfa, *this); + if (opts->posix_semantics) { + pdetctx_t ctx(opts, msg, cond, nfa, *this); + determinization(ctx); + } + else { + ldetctx_t ctx(opts, msg, cond, nfa, *this); + determinization(ctx); + } +} + +dfa_t::~dfa_t() +{ + std::vector::iterator + i = states.begin(), + e = states.end(); + for (; i != e; ++i) + { + delete *i; + } +} +template +void determinization(ctx_t &ctx) +{ const uint32_t INITIAL_TAGS = init_tag_versions(ctx); // initial state - const clos_t c0(nfa.root, 0, INITIAL_TAGS, HROOT, HROOT); + const clos_t c0(ctx.nfa.root, 0, INITIAL_TAGS, HROOT, HROOT); ctx.reach.push_back(c0); tagged_epsilon_closure(ctx); find_state(ctx); @@ -63,7 +84,7 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond ctx.dc_origin = i; clear_caches(ctx); - for (uint32_t c = 0; c < nchars; ++c) { + for (uint32_t c = 0; c < ctx.dfa.nchars; ++c) { reach_on_symbol(ctx, c); tagged_epsilon_closure(ctx); find_state(ctx); @@ -73,8 +94,8 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond warn_nondeterministic_tags(ctx); } - -void clear_caches(determ_context_t &ctx) +template +void clear_caches(ctx_t &ctx) { ctx.dc_newvers.clear(); @@ -84,8 +105,8 @@ void clear_caches(determ_context_t &ctx) } } - -void reach_on_symbol(determ_context_t &ctx, uint32_t sym) +template +void reach_on_symbol(ctx_t &ctx, uint32_t sym) { ctx.dc_symbol = sym; const uint32_t symbol = ctx.dfa.charset[ctx.dc_symbol]; @@ -106,7 +127,6 @@ void reach_on_symbol(determ_context_t &ctx, uint32_t sym) } } - nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) { if (state->type != nfa_state_t::RAN) { @@ -120,8 +140,8 @@ nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) return NULL; } - -uint32_t init_tag_versions(determ_context_t &ctx) +template +uint32_t init_tag_versions(ctx_t &ctx) { dfa_t &dfa = ctx.dfa; const size_t ntags = dfa.tags.size(); @@ -156,12 +176,12 @@ uint32_t init_tag_versions(determ_context_t &ctx) return INITIAL_TAGS; } - // For each tag, find maximal number of parallel versions of this tag // used in each kernel (degree of non-determinism) and warn about tags with // maximum degree two or more. // WARNING: this function assumes that kernel items are grouped by rule -void warn_nondeterministic_tags(const determ_context_t &ctx) +template +void warn_nondeterministic_tags(const ctx_t &ctx) { if (ctx.dc_opts->posix_syntax) return; @@ -211,8 +231,8 @@ void warn_nondeterministic_tags(const determ_context_t &ctx) } } - -determ_context_t::determ_context_t(const opt_t *opts, Msg &msg +template +determ_context_t::determ_context_t(const opt_t *opts, Msg &msg , const std::string &condname, const nfa_t &nfa, dfa_t &dfa) : dc_opts(opts) , dc_msg(msg) @@ -229,7 +249,7 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg , dc_kernels() , dc_buffers(dc_allocator) , dc_hc_caches() - , dc_newvers(newver_cmp_t(history, dc_hc_caches)) + , dc_newvers(newver_cmp_t::history_t>(history, dc_hc_caches)) , dc_path1() , dc_path2() , dc_path3() @@ -262,7 +282,7 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg dc_path3.reserve(ntags); dc_tagcount.resize(ntags); - if (opts->posix_semantics) { + if (SEMA == POSIX) { newprectbl = new prectable_t[ncores * ncores]; histlevel.reserve(ncores); sortcores.reserve(ncores); @@ -279,56 +299,12 @@ determ_context_t::determ_context_t(const opt_t *opts, Msg &msg } } - -determ_context_t::~determ_context_t() -{ - delete[] newprectbl; -} - - -dfa_t::~dfa_t() -{ - std::vector::iterator - i = states.begin(), - e = states.end(); - for (; i != e; ++i) - { - delete *i; - } -} - - -bool newver_cmp_t::operator()(const newver_t &x, const newver_t &y) const +template +determ_context_t::~determ_context_t() { - if (x.tag < y.tag) return true; - if (x.tag > y.tag) return false; - - if (x.base < y.base) return true; - if (x.base > y.base) return false; - - hidx_t xh = x.history, yh = y.history; - if (xh == yh) return false; - - hc_cache_t &cache = caches[x.tag]; - int32_t cmp; - - bool invert = xh > yh; - if (invert) std::swap(xh, yh); - - uint64_t k = static_cast(xh); - k = (k << 32) | static_cast(yh); - - hc_cache_t::const_iterator i = cache.find(k); - if (i != cache.end()) { - cmp = i->second; + if (SEMA == POSIX) { + delete[] newprectbl; } - else { - cmp = history.compare_reversed(xh, yh, x.tag); - cache.insert(std::make_pair(k, cmp)); - } - - if (invert) cmp = -cmp; - return cmp < 0; } } // namespace re2c diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h index 0c3f6d62..45a20949 100644 --- a/re2c/src/dfa/determinization.h +++ b/re2c/src/dfa/determinization.h @@ -27,11 +27,9 @@ struct Msg; struct dfa_t; struct tcmd_t; - typedef slab_allocator_t<1024 * 1024, sizeof(void*)> allocator_t; typedef int32_t prectable_t; - struct clos_t { nfa_state_t *state; @@ -52,14 +50,12 @@ struct clos_t static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; } }; - typedef std::vector closure_t; typedef closure_t::iterator clositer_t; typedef closure_t::const_iterator cclositer_t; typedef closure_t::reverse_iterator rclositer_t; typedef closure_t::const_reverse_iterator rcclositer_t; - struct newver_t { size_t tag; @@ -67,22 +63,19 @@ struct newver_t hidx_t history; }; - typedef std::map hc_cache_t; // 'hc' for history comparison typedef std::vector hc_caches_t; + +template struct newver_cmp_t { - tag_history_t &history; + history_t &history; hc_caches_t &caches; - newver_cmp_t(tag_history_t &h, hc_caches_t &c): history(h), caches(c) {} + newver_cmp_t(history_t &h, hc_caches_t &c): history(h), caches(c) {} bool operator()(const newver_t &, const newver_t &) const; }; - -typedef std::map newvers_t; - - struct kernel_t { size_t size; @@ -94,7 +87,6 @@ struct kernel_t FORBID_COPY(kernel_t); }; - struct kernel_buffers_t { size_t maxsize; @@ -111,13 +103,11 @@ struct kernel_buffers_t explicit kernel_buffers_t(allocator_t &alc); }; - struct cmp_gtop_t { inline bool operator() (const nfa_state_t *x, const nfa_state_t *y) const; }; - struct histleaf_t { uint32_t coreid; @@ -126,12 +116,11 @@ struct histleaf_t int32_t height; }; - typedef lookup_t kernels_t; typedef std::priority_queue , cmp_gtop_t> gtop_heap_t; - +template struct determ_context_t { typedef clos_t conf_t; @@ -140,6 +129,8 @@ struct determ_context_t typedef confset_t::const_iterator cconfiter_t; typedef confset_t::reverse_iterator rconfiter_t; typedef confset_t::const_reverse_iterator rcconfiter_t; + typedef typename history_type_t::type history_t; + typedef std::map > newvers_t; // determinization input const opt_t *dc_opts; // options @@ -157,7 +148,7 @@ struct determ_context_t uint32_t dc_symbol; // alphabet symbol of the current transition tcmd_t *dc_actions; // tag actions of the current transition tagver_table_t dc_tagvertbl; - tag_history_t history; // prefix trie of tag histories + history_t history; // prefix trie of tag histories kernels_t dc_kernels; // TDFA states under construction kernel_buffers_t dc_buffers; hc_caches_t dc_hc_caches; // per-tag cache of history comparisons @@ -195,15 +186,14 @@ struct determ_context_t FORBID_COPY(determ_context_t); }; -// maximum 29-bit (we have 30 bits, but highest must be non-negative) -static const int32_t MAX_RHO = 0x1fffFFFF; +typedef determ_context_t pdetctx_t; +typedef determ_context_t ldetctx_t; -void tagged_epsilon_closure(determ_context_t &ctx); -void closure_posix(determ_context_t &); -void closure_leftmost(determ_context_t &); -void find_state(determ_context_t &ctx); +template void tagged_epsilon_closure(ctx_t &ctx); +template void find_state(ctx_t &ctx); +void closure_leftmost(ldetctx_t &); -bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const +inline bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const { return x->topord < y->topord; } diff --git a/re2c/src/dfa/find_state.cc b/re2c/src/dfa/find_state.cc index 234d6942..464da642 100644 --- a/re2c/src/dfa/find_state.cc +++ b/re2c/src/dfa/find_state.cc @@ -74,34 +74,37 @@ namespace re2c * more complex analysis (and are not so useful after all), so we drop them. */ - +template struct kernel_eq_t { - determ_context_t &ctx; - bool operator()(const kernel_t *, const kernel_t *) const; + ctx_t &ctx; + bool operator()(const kernel_t *x, const kernel_t *y) const; }; - +template struct kernel_map_t { - determ_context_t &ctx; - bool operator()(const kernel_t *, const kernel_t *); + ctx_t &ctx; + bool operator()(const kernel_t *x, const kernel_t *y); }; - -static kernel_t *make_new_kernel(size_t, allocator_t &); -static kernel_t *make_kernel_copy(const kernel_t *, allocator_t &); -static void copy_to_buffer_kernel(const closure_t &, const prectable_t *, kernel_t *); -static void reserve_buffers(determ_context_t &); -static uint32_t hash_kernel(const kernel_t *kernel); -static bool equal_lookahead_tags(determ_context_t &, const kernel_t *, const kernel_t *); +template static bool do_find_state(ctx_t &ctx); +template static tcmd_t *final_actions(ctx_t &ctx, const clos_t &fin); +template static void reserve_buffers(ctx_t &ctx); +template static bool equal_lookahead_tags(ctx_t &ctx, const kernel_t *x, const kernel_t *y); +template static void unwind(const typename ctx_t::history_t &hist, tag_path_t &path, hidx_t idx); static void group_by_tag(tag_path_t &path, tag_path_t &buf, std::vector &count); -static void unwind(const tag_history_t &hist, tag_path_t &path, hidx_t idx); -static bool do_find_state(determ_context_t &ctx); -static tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin); +static kernel_t *make_new_kernel(size_t size, allocator_t &alc); +static kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc); +static uint32_t hash_kernel(const kernel_t *kernel); +static void copy_to_buffer_kernel(const closure_t &closure, const prectable_t *prectbl, kernel_t *buffer); +// explicit specialization for context types +template void find_state(pdetctx_t &ctx); +template void find_state(ldetctx_t &ctx); -void find_state(determ_context_t &ctx) +template +void find_state(ctx_t &ctx) { dfa_t &dfa = ctx.dfa; @@ -138,8 +141,8 @@ void find_state(determ_context_t &ctx) DDUMP_DFA_RAW(ctx, is_new); } - -bool do_find_state(determ_context_t &ctx) +template +bool do_find_state(ctx_t &ctx) { kernels_t &kernels = ctx.dc_kernels; const closure_t &closure = ctx.state; @@ -162,13 +165,13 @@ bool do_find_state(determ_context_t &ctx) const uint32_t hash = hash_kernel(k); // try to find identical kernel - kernel_eq_t cmp_eq = {ctx}; + kernel_eq_t cmp_eq = {ctx}; ctx.dc_target = kernels.find_with(hash, k, cmp_eq); if (ctx.dc_target != kernels_t::NIL) return false; // else try to find mappable kernel // see note [bijective mappings] - kernel_map_t cmp_map = {ctx}; + kernel_map_t cmp_map = {ctx}; ctx.dc_target = kernels.find_with(hash, k, cmp_map); if (ctx.dc_target != kernels_t::NIL) return false; @@ -178,14 +181,14 @@ bool do_find_state(determ_context_t &ctx) return true; } - -tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin) +template +tcmd_t *final_actions(ctx_t &ctx, const clos_t &fin) { dfa_t &dfa = ctx.dfa; const Rule &rule = dfa.rules[fin.state->rule]; const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers]; const hidx_t look = fin.thist; - const tag_history_t &thist = ctx.history; + const typename ctx_t::history_t &thist = ctx.history; tcpool_t &tcpool = dfa.tcpool; tcmd_t *copy = NULL, *save = NULL, **p; @@ -194,7 +197,7 @@ tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin) const Tag &tag = dfa.tags[t]; if (fixed(tag)) continue; - const tagver_t v = abs(vers[t]), l = thist.last(look, t); + const tagver_t v = abs(vers[t]), l = last(thist, look, t); tagver_t &f = dfa.finvers[t]; if (l == TAGVER_ZERO) { copy = tcpool.make_copy(copy, f, v); @@ -212,7 +215,6 @@ tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin) return copy; } - kernel_buffers_t::kernel_buffers_t(allocator_t &alc) : maxsize(0) // usually ranges from one to some twenty , kernel(make_new_kernel(maxsize, alc)) @@ -226,7 +228,6 @@ kernel_buffers_t::kernel_buffers_t(allocator_t &alc) , backup_actions(NULL) {} - kernel_t *make_new_kernel(size_t size, allocator_t &alc) { kernel_t *k = alc.alloct(1); @@ -238,7 +239,6 @@ kernel_t *make_new_kernel(size_t size, allocator_t &alc) return k; } - kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc) { const size_t n = kernel->size; @@ -259,7 +259,6 @@ kernel_t *make_kernel_copy(const kernel_t *kernel, allocator_t &alc) return k; } - uint32_t hash_kernel(const kernel_t *kernel) { const size_t n = kernel->size; @@ -278,7 +277,6 @@ uint32_t hash_kernel(const kernel_t *kernel) return h; } - void copy_to_buffer_kernel(const closure_t &closure, const prectable_t *prectbl, kernel_t *buffer) { @@ -296,8 +294,8 @@ void copy_to_buffer_kernel(const closure_t &closure, } } - -void reserve_buffers(determ_context_t &ctx) +template +void reserve_buffers(ctx_t &ctx) { kernel_buffers_t &kbufs = ctx.dc_buffers; allocator_t &alc = ctx.dc_allocator; @@ -338,9 +336,8 @@ void reserve_buffers(determ_context_t &ctx) } } - -bool equal_lookahead_tags(determ_context_t &ctx - , const kernel_t *x, const kernel_t *y) +template +bool equal_lookahead_tags(ctx_t &ctx, const kernel_t *x, const kernel_t *y) { DASSERT(x->size == y->size); @@ -348,7 +345,7 @@ bool equal_lookahead_tags(determ_context_t &ctx return true; } - tag_history_t &thist = ctx.history; + typename ctx_t::history_t &thist = ctx.history; tag_path_t &p1 = ctx.dc_path1, &p2 = ctx.dc_path2, &p3 = ctx.dc_path3; std::vector &count = ctx.dc_tagcount; @@ -357,8 +354,8 @@ bool equal_lookahead_tags(determ_context_t &ctx if (xl == yl) continue; - unwind(thist, p1, xl); - unwind(thist, p2, yl); + unwind(thist, p1, xl); + unwind(thist, p2, yl); if (p1.size() != p2.size()) return false; @@ -371,7 +368,6 @@ bool equal_lookahead_tags(determ_context_t &ctx return true; } - void group_by_tag(tag_path_t &path, tag_path_t &buf, std::vector &count) { // counting sort with tag index as key @@ -396,8 +392,8 @@ void group_by_tag(tag_path_t &path, tag_path_t &buf, std::vector &coun path.swap(buf); } - -void unwind(const tag_history_t &hist, tag_path_t &path, hidx_t idx) +template +void unwind(const typename ctx_t::history_t &hist, tag_path_t &path, hidx_t idx) { // Simple tags need only the last value, so in principle we could // increase the chance of mapping by recording only the last value. @@ -405,14 +401,14 @@ void unwind(const tag_history_t &hist, tag_path_t &path, hidx_t idx) // cases when it makes any difference are rare. path.clear(); for (; idx != HROOT; ) { - const tag_history_t::node_t &n = hist.node(idx); + const typename ctx_t::history_t::node_t &n = hist.node(idx); path.push_back(n.info); idx = n.pred; } } - -bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const +template +bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const { // check that kernel sizes, NFA states, tags versions, // lookahead tags and precedence table coincide @@ -424,8 +420,8 @@ bool kernel_eq_t::operator()(const kernel_t *x, const kernel_t *y) const && equal_lookahead_tags(ctx, x, y); } - -bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y) +template +bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y) { // check that kernel sizes, NFA states lookahead tags // and precedence table coincide (versions might differ) @@ -455,7 +451,7 @@ bool kernel_map_t::operator()(const kernel_t *x, const kernel_t *y) for (size_t t = 0; t < ntag; ++t) { // see note [mapping ignores items with lookahead tags] if (!history(tags[t]) - && ctx.history.last(xl, t) != TAGVER_ZERO) continue; + && last(ctx.history, xl, t) != TAGVER_ZERO) continue; const tagver_t xv = xvs[t], yv = yvs[t]; tagver_t &xv0 = y2x[yv], &yv0 = x2y[xv]; diff --git a/re2c/src/dfa/posix_precedence.h b/re2c/src/dfa/posix_precedence.h index 607b62c1..8b8895bd 100644 --- a/re2c/src/dfa/posix_precedence.h +++ b/re2c/src/dfa/posix_precedence.h @@ -7,6 +7,9 @@ namespace re2c { +// maximum 29-bit (we have 30 bits, but highest must be non-negative) +static const int32_t MAX_RHO = 0x1fffFFFF; + inline int32_t unpack_longest(int32_t packed) { // take lower 30 bits and sign-extend @@ -86,7 +89,7 @@ int32_t precedence(ctx_t &ctx, const conf_t &x, const conf_t &y } const std::vector &tags = ctx.nfa.tags; - tag_history_t &hist = ctx.history; + typename ctx_t::history_t &hist = ctx.history; const bool fork_frame = orig1 == orig2; if (!fork_frame) { @@ -99,13 +102,13 @@ int32_t precedence(ctx_t &ctx, const conf_t &x, const conf_t &y int32_t i1 = idx1, i2 = idx2; for (; i1 != i2; ) { if (i1 > i2) { - const tag_history_t::node_t &n = hist.node(i1); + const typename ctx_t::history_t::node_t &n = hist.node(i1); info1 = n.info; prec1 = std::min(prec1, tags[info1.idx].height); i1 = n.pred; } else { - const tag_history_t::node_t &n = hist.node(i2); + const typename ctx_t::history_t::node_t &n = hist.node(i2); info2 = n.info; prec2 = std::min(prec2, tags[info2.idx].height); i2 = n.pred; @@ -134,7 +137,7 @@ void compute_prectable(ctx_t &ctx) { const typename ctx_t::confset_t &state = ctx.state; const std::vector &tags = ctx.nfa.tags; - tag_history_t &history = ctx.history; + typename ctx_t::history_t &history = ctx.history; const prectable_t *oldtbl = ctx.oldprectbl; prectable_t *newtbl = ctx.newprectbl; @@ -157,20 +160,20 @@ void compute_prectable(ctx_t &ctx) // array of boundaries in the sorted configuration array. uint32_t maxfin = 0; for (typename ctx_t::cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) { - uint32_t &x = history.node1(c->thist).finidx; - if (x >= USED) { - x = maxfin++; - fcount[x] = 0; + typename ctx_t::history_t::node_t &n = history.node(c->thist); + if (n.finidx >= USED) { + n.finidx = maxfin++; + fcount[n.finidx] = 0; // mark all nodes down to root as used (unless marked already) - for (int32_t i = history.node(c->thist).pred; i >= HROOT; ) { - uint32_t &y = history.node1(i).finidx; - if (y <= USED) break; - y = USED; - i = history.node(i).pred; + for (int32_t i = n.pred; i >= HROOT; ) { + typename ctx_t::history_t::node_t &m = history.node(i); + if (m.finidx <= USED) break; + m.finidx = USED; + i = m.pred; } } - ++fcount[x]; + ++fcount[n.finidx]; } fcount[maxfin] = 0; for (size_t i = 1; i <= maxfin; ++i) { @@ -178,7 +181,7 @@ void compute_prectable(ctx_t &ctx) } sortcores.resize(state.size()); for (uint32_t i = static_cast(newdim); i --> 0; ) { - sortcores[--fcount[history.node1(state[i].thist).finidx]] = i; + sortcores[--fcount[history.node(state[i].thist).finidx]] = i; } // Depth-first traversal of the history tree. During traversal we grow @@ -190,7 +193,7 @@ void compute_prectable(ctx_t &ctx) stack.push_back(0); while (!stack.empty()) { const int32_t n = stack.back(); - tag_history_t::node1_t &node = history.node1(n); + typename ctx_t::history_t::node_t &node = history.node(n); const uint32_t fidx = node.finidx; if (fidx == NONFIN) { @@ -201,14 +204,14 @@ void compute_prectable(ctx_t &ctx) if (node.next != -1) { // start or continue visiting subtrees rooted at this node - const tag_history_t::arc_t &arc = history.arc(node.next); + const typename ctx_t::history_t::arc_t &arc = history.arc(node.next); stack.push_back(arc.node); node.next = arc.next; continue; } // all subtrees visited, it's time to process this node - const int32_t h = n == 0 ? MAX_RHO : tags[history.node(n).info.idx].height; + const int32_t h = n == 0 ? MAX_RHO : tags[node.info.idx].height; li = level.rbegin(); le = level.rend(); @@ -250,7 +253,7 @@ void compute_prectable(ctx_t &ctx) // their precedence has already been computed and must not be touched. for (int32_t a = node.last; a != -1; ) { - const tag_history_t::arc_t &arc = history.arc(a); + const typename ctx_t::history_t::arc_t &arc = history.arc(a); a = arc.prev; // for all the items of this subtree diff --git a/re2c/src/dfa/tag_history.h b/re2c/src/dfa/tag_history.h index 094a559c..eb202123 100644 --- a/re2c/src/dfa/tag_history.h +++ b/re2c/src/dfa/tag_history.h @@ -20,17 +20,21 @@ const tag_info_t NOINFO = {0x3fffFFFF, 0}; static const uint32_t NONFIN = ~0u; static const uint32_t USED = NONFIN - 1; -// Different algorithms need to keep slightly different data in history. -// We store main data in one array, and auxilary data in separate arrays -// (this allows to avoid overhead in algorithms that don't need it). -struct tag_history_t +enum sema_t {POSIX, LEFTMOST}; +enum eval_t {STRICT, LAZY}; + +// tag history for POSIX semantics +struct phistory_t { struct node_t { tag_info_t info; hidx_t pred; + hidx_t last; + hidx_t next; + uint32_t finidx; - inline node_t(tag_info_t info, hidx_t pred) - : info(info), pred(pred) {} + inline node_t(tag_info_t info, hidx_t pred, hidx_t last, hidx_t next) + : info(info), pred(pred), last(last), next(next), finidx(NONFIN) {} }; struct arc_t { @@ -42,90 +46,136 @@ struct tag_history_t : node(node), prev(prev), next(next) {} }; - struct node1_t { - hidx_t last; - hidx_t next; - uint32_t finidx; + std::vector nodes; + std::vector arcs; - inline node1_t(hidx_t last, hidx_t next) - : last(last), next(next), finidx(NONFIN) {} - }; + inline phistory_t(): nodes(), arcs() { init(); } + inline void init(); + inline void detach(); + inline node_t &node(hidx_t i) { return nodes[static_cast(i)]; } + inline const node_t &node(hidx_t i) const { return nodes[static_cast(i)]; } + inline arc_t &arc(hidx_t i) { return arcs[static_cast(i)]; } + inline const arc_t &arc(hidx_t i) const { return arcs[static_cast(i)]; } + inline hidx_t push(hidx_t idx, tag_info_t info); + FORBID_COPY(phistory_t); +}; - struct node2_t { - uint32_t step; - uint32_t orig; +// tag history for leftmost greedy semantics +struct lhistory_t +{ + struct node_t { + tag_info_t info; + hidx_t pred; - inline node2_t(uint32_t step, uint32_t orig) - : step(step), orig(orig) {} + inline node_t(tag_info_t info, hidx_t pred) + : info(info), pred(pred) {} }; - // main history in the form of a backward-linked trie std::vector nodes; - // forward-linked history used by POSIX disambiguation - std::vector nodes1; - std::vector arcs; - - // auxilary data used by lazy POSIX disambiguation - std::vector nodes2; - - inline tag_history_t(); + inline lhistory_t(): nodes() { init(); } inline void init(); - inline void detach(); - inline node_t &node(hidx_t i) { return nodes[static_cast(i)]; } - inline node1_t &node1(hidx_t i) { return nodes1[static_cast(i)]; } - inline node2_t &node2(hidx_t i) { return nodes2[static_cast(i)]; } - inline arc_t &arc(hidx_t i) { return arcs[static_cast(i)]; } inline const node_t &node(hidx_t i) const { return nodes[static_cast(i)]; } - inline const node1_t &node1(hidx_t i) const { return nodes1[static_cast(i)]; } - inline const node2_t &node2(hidx_t i) const { return nodes2[static_cast(i)]; } - inline const arc_t &arc(hidx_t i) const { return arcs[static_cast(i)]; } - inline hidx_t push(hidx_t idx, tag_info_t info); - inline hidx_t push1(hidx_t idx, tag_info_t info); - inline hidx_t push2(hidx_t idx, uint32_t step, tag_info_t info, uint32_t orig); + FORBID_COPY(lhistory_t); +}; - inline tagver_t last(hidx_t i, size_t t) const; - inline int32_t compare_reversed(hidx_t x, hidx_t y, size_t t) const; +// tag history for lazy disambiguation (both POSIX and leftmost greedy) +struct zhistory_t +{ + struct node_t { + tag_info_t info; + hidx_t pred; + uint32_t step; + uint32_t orig; - FORBID_COPY(tag_history_t); + inline node_t(tag_info_t info, hidx_t pred, uint32_t step, uint32_t orig) + : info(info), pred(pred), step(step), orig(orig) {} + }; + + std::vector nodes; + + inline zhistory_t(): nodes() { init(); } + inline void init(); + inline node_t &node(hidx_t i) { return nodes[static_cast(i)]; } + inline const node_t &node(hidx_t i) const { return nodes[static_cast(i)]; } + inline hidx_t push(hidx_t idx, uint32_t step, tag_info_t info, uint32_t orig); + FORBID_COPY(zhistory_t); }; -tag_history_t::tag_history_t() - : nodes() - , nodes1() - , arcs() - , nodes2() +void phistory_t::init() { - init(); + nodes.clear(); + arcs.clear(); + nodes.push_back(node_t(NOINFO, -1, -1, -1)); } -void tag_history_t::init() +void lhistory_t::init() { nodes.clear(); - nodes1.clear(); - arcs.clear(); - nodes2.clear(); - nodes.push_back(node_t(NOINFO, -1)); - nodes1.push_back(node1_t(-1, -1)); - nodes2.push_back(node2_t(0, 0)); } -void tag_history_t::detach() +void zhistory_t::init() +{ + nodes.clear(); + nodes.push_back(node_t(NOINFO, -1, 0, 0)); +} + +void phistory_t::detach() { // don't delete existing tree, just detach it from root // pointers to old tree are still valid, but traversals will ignore it - node1_t &n = node1(0); + node_t &n = node(0); n.last = n.next = -1; n.finidx = NONFIN; } -tagver_t tag_history_t::last(hidx_t i, size_t t) const +int32_t phistory_t::push(int32_t idx, tag_info_t info) +{ + const int32_t i = static_cast(nodes.size()); + if (idx != -1) { + node_t &n = node(idx); + const int32_t a = static_cast(arcs.size()); + arcs.push_back(arc_t(i, n.last, -1)); + if (n.next == -1) { + n.next = a; + } + else { + arc(n.last).next = a; + } + n.last = a; + } + nodes.push_back(node_t(info, idx, -1, -1)); + return i; +} + +int32_t lhistory_t::push(int32_t idx, tag_info_t info) +{ + const int32_t i = static_cast(nodes.size()); + nodes.push_back(node_t(info, idx)); + return i; +} + +int32_t zhistory_t::push(int32_t idx, uint32_t step, tag_info_t info, uint32_t orig) +{ + const int32_t i = static_cast(nodes.size()); + nodes.push_back(node_t(info, idx, step, orig)); + return i; +} + +// history type selector +template struct history_type_t; +template<> struct history_type_t {typedef phistory_t type;}; +template<> struct history_type_t {typedef lhistory_t type;}; +template struct history_type_t {typedef zhistory_t type;}; + +template +tagver_t last(const history_t &h, hidx_t i, size_t t) { for (; i != HROOT; ) { - const node_t &n = node(i); + const typename history_t::node_t &n = h.node(i); if (n.info.idx == t) { return n.info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; } @@ -134,21 +184,22 @@ tagver_t tag_history_t::last(hidx_t i, size_t t) const return TAGVER_ZERO; } -int32_t tag_history_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const +template +int32_t compare_reversed(const history_t &h, hidx_t x, hidx_t y, size_t t) { if (x == y) return 0; // compare in reverse, from tail to head: direction makes // no difference when comparing for exact coincidence for (;;) { - for (; x != HROOT && node(x).info.idx != t; x = node(x).pred); - for (; y != HROOT && node(y).info.idx != t; y = node(y).pred); + for (; x != HROOT && h.node(x).info.idx != t; x = h.node(x).pred); + for (; y != HROOT && h.node(y).info.idx != t; y = h.node(y).pred); if (x == y) return 0; if (x == HROOT) return -1; if (y == HROOT) return 1; - const node_t &nx = node(x), &ny = node(y); + const typename history_t::node_t &nx = h.node(x), &ny = h.node(y); if (nx.info.neg > ny.info.neg) return -1; if (nx.info.neg < ny.info.neg) return 1; @@ -158,41 +209,6 @@ int32_t tag_history_t::compare_reversed(hidx_t x, hidx_t y, size_t t) const } } -int32_t tag_history_t::push(int32_t idx, tag_info_t info) -{ - const int32_t i = static_cast(nodes.size()); - nodes.push_back(node_t(info, idx)); - return i; -} - -int32_t tag_history_t::push1(int32_t idx, tag_info_t info) -{ - const int32_t i = static_cast(nodes.size()); - if (idx != -1) { - node1_t &n = node1(idx); - const int32_t a = static_cast(arcs.size()); - arcs.push_back(arc_t(i, n.last, -1)); - if (n.next == -1) { - n.next = a; - } - else { - arc(n.last).next = a; - } - n.last = a; - } - nodes.push_back(node_t(info, idx)); - nodes1.push_back(node1_t(-1, -1)); - return i; -} - -int32_t tag_history_t::push2(int32_t idx, uint32_t step, tag_info_t info, uint32_t orig) -{ - const int32_t i = static_cast(nodes.size()); - nodes.push_back(node_t(info, idx)); - nodes2.push_back(node2_t(step, orig)); - return i; -} - } // namespace re2c #endif // _RE2C_DFA_TAG_HISTORY_ diff --git a/re2c/src/dfa/tcmd.cc b/re2c/src/dfa/tcmd.cc index 3746924a..20385c92 100644 --- a/re2c/src/dfa/tcmd.cc +++ b/re2c/src/dfa/tcmd.cc @@ -34,10 +34,14 @@ namespace re2c * The algorithm starts and ends with all-zero in-degree buffer. */ +// explicit specialization for history types +template tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs + , tagver_t rhs, const phistory_t &history, hidx_t hidx, size_t tag); +template tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs + , tagver_t rhs, const lhistory_t &history, hidx_t hidx, size_t tag); static uint32_t hash_tcmd(const tcmd_t *tcmd); - bool tcmd_t::equal(const tcmd_t &x, const tcmd_t &y) { return x.lhs == y.lhs @@ -45,7 +49,6 @@ bool tcmd_t::equal(const tcmd_t &x, const tcmd_t &y) && equal_history(x.history, y.history); } - bool tcmd_t::equal_history(const tagver_t *h, const tagver_t *g) { for (;;) { @@ -55,13 +58,11 @@ bool tcmd_t::equal_history(const tagver_t *h, const tagver_t *g) } } - bool tcmd_t::iscopy(const tcmd_t *x) { return x->rhs != TAGVER_ZERO && x->history[0] == TAGVER_ZERO; } - bool tcmd_t::isset(const tcmd_t *x) { if (x->rhs == TAGVER_ZERO) { @@ -71,13 +72,11 @@ bool tcmd_t::isset(const tcmd_t *x) return false; } - bool tcmd_t::isadd(const tcmd_t *x) { return x->rhs != TAGVER_ZERO && x->history[0] != TAGVER_ZERO; } - bool tcmd_t::topsort(tcmd_t **phead, uint32_t *indeg) { tcmd_t *x0 = *phead, *x, *y0 = NULL, **py; @@ -122,7 +121,6 @@ bool tcmd_t::topsort(tcmd_t **phead, uint32_t *indeg) return nontrivial_cycles; } - tcpool_t::tcpool_t() : alc() , index() @@ -132,7 +130,6 @@ tcpool_t::tcpool_t() DASSERT(TCID0 == insert(NULL)); } - tcmd_t *tcpool_t::make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs) { tcmd_t *p = alc.alloct(1); @@ -143,7 +140,6 @@ tcmd_t *tcpool_t::make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs) return p; } - tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set) { const size_t size = sizeof(tcmd_t) + sizeof(tagver_t); @@ -156,13 +152,13 @@ tcmd_t *tcpool_t::make_set(tcmd_t *next, tagver_t lhs, tagver_t set) return p; } - +template tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, - const tag_history_t &history, hidx_t hidx, size_t tag) + const history_t &history, hidx_t hidx, size_t tag) { size_t hlen = 0; for (hidx_t i = hidx; i != HROOT; ) { - const tag_history_t::node_t &n = history.node(i); + const typename history_t::node_t &n = history.node(i); if (n.info.idx == tag) ++hlen; i = n.pred; } @@ -174,7 +170,7 @@ tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, p->rhs = rhs; tagver_t *h = p->history; for (hidx_t i = hidx; i != HROOT; ) { - const tag_history_t::node_t &n = history.node(i); + const typename history_t::node_t &n = history.node(i); if (n.info.idx == tag) { *h++ = n.info.neg ? TAGVER_BOTTOM : TAGVER_CURSOR; } @@ -184,7 +180,6 @@ tcmd_t *tcpool_t::make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, return p; } - tcmd_t *tcpool_t::copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history) { @@ -200,7 +195,6 @@ tcmd_t *tcpool_t::copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, return p; } - uint32_t hash_tcmd(const tcmd_t *tcmd) { uint32_t h = 0; @@ -212,7 +206,6 @@ uint32_t hash_tcmd(const tcmd_t *tcmd) return h; } - struct tcmd_eq_t { bool operator()(const tcmd_t *x, const tcmd_t *y) const @@ -227,7 +220,6 @@ struct tcmd_eq_t } }; - tcid_t tcpool_t::insert(const tcmd_t *tcmd) { const uint32_t h = hash_tcmd(tcmd); @@ -241,7 +233,6 @@ tcid_t tcpool_t::insert(const tcmd_t *tcmd) return static_cast(id); } - const tcmd_t *tcpool_t::operator[](tcid_t id) const { return index[id]; diff --git a/re2c/src/dfa/tcmd.h b/re2c/src/dfa/tcmd.h index ab68e5e4..159b8ac1 100644 --- a/re2c/src/dfa/tcmd.h +++ b/re2c/src/dfa/tcmd.h @@ -43,7 +43,7 @@ public: tcpool_t(); tcmd_t *make_copy(tcmd_t *next, tagver_t lhs, tagver_t rhs); tcmd_t *make_set(tcmd_t *next, tagver_t lhs, tagver_t set); - tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tag_history_t &history, hidx_t hidx, size_t tag); + template tcmd_t *make_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const history_t &history, hidx_t hidx, size_t tag); tcmd_t *copy_add(tcmd_t *next, tagver_t lhs, tagver_t rhs, const tagver_t *history); tcid_t insert(const tcmd_t *tcmd); const tcmd_t *operator[](tcid_t id) const;