From 21a3df383bc9c29711bc1ec4786d111e6e2907d0 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 8 Feb 2019 16:40:47 +0000 Subject: [PATCH] Use per-tag cache of history comparisons to avoid repeated computation. --- re2c/src/dfa/closure.cc | 2 +- re2c/src/dfa/determinization.cc | 44 +++++++++++++++++++++++++++++---- re2c/src/dfa/determinization.h | 11 ++++++--- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/re2c/src/dfa/closure.cc b/re2c/src/dfa/closure.cc index f5620ef6..e28070fd 100644 --- a/re2c/src/dfa/closure.cc +++ b/re2c/src/dfa/closure.cc @@ -165,7 +165,7 @@ void generate_versions(determ_context_t &ctx) newvers_t &newvers = ctx.dc_newvers; clositer_t b = clos.begin(), e = clos.end(), c; - newver_cmp_t cmp(thist); + newver_cmp_t cmp(thist, ctx.dc_hc_caches); newvers_t newacts(cmp); tcmd_t *cmd = NULL; diff --git a/re2c/src/dfa/determinization.cc b/re2c/src/dfa/determinization.cc index 96a0d8fc..0487d5cf 100644 --- a/re2c/src/dfa/determinization.cc +++ b/re2c/src/dfa/determinization.cc @@ -22,6 +22,7 @@ namespace re2c { +static void clear_caches(determ_context_t &ctx); static void reach_on_symbol(determ_context_t &); static nfa_state_t *transition(nfa_state_t *, uint32_t); static uint32_t init_tag_versions(determ_context_t &); @@ -58,9 +59,8 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn // build tagged epsilon-closure of all reachable NFA states, // then find identical or mappable DFA state or add a new one for (uint32_t i = 0; i < ctx.dc_kernels.size(); ++i) { - ctx.dc_origin = i; - ctx.dc_newvers.clear(); + clear_caches(ctx); for (uint32_t c = 0; c < nchars; ++c) { ctx.dc_symbol = c; @@ -75,6 +75,17 @@ dfa_t::dfa_t(const nfa_t &nfa, const opt_t *opts, const std::string &cond, Warn } +void clear_caches(determ_context_t &ctx) +{ + ctx.dc_newvers.clear(); + + const size_t ntags = ctx.dc_nfa.tags.size(); + for (size_t t = 0; t < ntags; ++t) { + ctx.dc_hc_caches[t].clear(); + } +} + + void reach_on_symbol(determ_context_t &ctx) { const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin]; @@ -216,10 +227,11 @@ determ_context_t::determ_context_t(const opt_t *opts, Warn &warn , dc_taghistory() , dc_kernels() , dc_buffers(dc_allocator) - , dc_newvers(newver_cmp_t(dc_taghistory)) , dc_stack_topsort() , dc_stack_linear() , dc_stack_dfs() + , dc_hc_caches() + , dc_newvers(newver_cmp_t(dc_taghistory, dc_hc_caches)) , dc_path1() , dc_path2() , dc_path3() @@ -228,6 +240,7 @@ determ_context_t::determ_context_t(const opt_t *opts, Warn &warn , dc_clstats() { const size_t ntags = nfa.tags.size(); + dc_hc_caches.resize(ntags); dc_path1.reserve(ntags); dc_path2.reserve(ntags); dc_path3.reserve(ntags); @@ -247,7 +260,7 @@ dfa_t::~dfa_t() } -bool newver_cmp_t::operator()(const newver_t &x, const newver_t &y) const +bool newver_cmp_t::operator()(const newver_t &x, const newver_t &y) { if (x.tag < y.tag) return true; if (x.tag > y.tag) return false; @@ -255,7 +268,28 @@ bool newver_cmp_t::operator()(const newver_t &x, const newver_t &y) const if (x.base < y.base) return true; if (x.base > y.base) return false; - return history.compare_reversed(x.history, y.history, x.tag) < 0; + hidx_t xh = x.history, yh = y.history; + if (xh == yh) return false; + + hc_cache_t &cache = caches[x.tag]; + int32_t cmp; + + bool invert = xh > yh; + if (invert) std::swap(xh, yh); + + const uint64_t k = (static_cast(xh) << 32) | yh; + + hc_cache_t::const_iterator i = cache.find(k); + if (i != cache.end()) { + cmp = i->second; + } + else { + cmp = history.compare_reversed(xh, yh, x.tag); + cache.insert(std::make_pair(k, cmp)); + } + + if (invert) cmp = -cmp; + return cmp < 0; } } // namespace re2c diff --git a/re2c/src/dfa/determinization.h b/re2c/src/dfa/determinization.h index eecf2ba3..6a3cd12b 100644 --- a/re2c/src/dfa/determinization.h +++ b/re2c/src/dfa/determinization.h @@ -58,12 +58,15 @@ struct newver_t }; +typedef std::map hc_cache_t; // 'hc' for history comparison +typedef std::vector hc_caches_t; struct newver_cmp_t { tag_history_t &history; + hc_caches_t &caches; - explicit newver_cmp_t(tag_history_t &h) : history(h) {} - bool operator()(const newver_t &, const newver_t &) const; + newver_cmp_t(tag_history_t &h, hc_caches_t &c): history(h), caches(c) {} + bool operator()(const newver_t &, const newver_t &); }; @@ -126,11 +129,11 @@ struct determ_context_t tag_history_t dc_taghistory; // prefix trie of tag histories kernels_t dc_kernels; // TDFA states under construction kernel_buffers_t dc_buffers; - newvers_t dc_newvers; std::stack dc_stack_topsort; std::stack dc_stack_linear; std::stack dc_stack_dfs; - + hc_caches_t dc_hc_caches; // per-tag cache of history comparisons + newvers_t dc_newvers; // map of triples (tag, version, history) to new version tag_path_t dc_path1; // buffer 1 for tag history tag_path_t dc_path2; // buffer 2 for tag history tag_path_t dc_path3; // buffer 3 for tag history -- 2.40.0