From d7ad1cd6e68222e230bc962b1141a2522976162a Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sat, 13 Jul 2019 11:32:04 +0100 Subject: [PATCH] Fixed Clang warning -Wunused-template. --- lib/regex_impl.h | 51 +++++++++++ lib/regexec_nfa_posix_trie.cc | 159 +++++++++++++++++++++++++++++++++- src/dfa/posix_precedence.h | 156 --------------------------------- src/dfa/tag_history.h | 51 ----------- src/util/string_utils.h | 8 +- 5 files changed, 212 insertions(+), 213 deletions(-) diff --git a/lib/regex_impl.h b/lib/regex_impl.h index 23f1d75f..39122ae7 100644 --- a/lib/regex_impl.h +++ b/lib/regex_impl.h @@ -95,6 +95,42 @@ struct simctx_t FORBID_COPY(simctx_t); }; +// tag history for lazy disambiguation (both POSIX and leftmost greedy) +struct zhistory_t +{ + struct node_t { + tag_info_t info; + hidx_t pred; + uint32_t orig; + uint32_t step; + + inline node_t(tag_info_t info, hidx_t pred, uint32_t orig, uint32_t step) + : info(info), pred(pred), orig(orig), step(step) {} + }; + + struct cache_entry_t + { + int32_t prec1; + int32_t prec2; + int32_t prec; + }; + typedef std::map cache_t; + + std::vector nodes; + cache_t cache; + + inline zhistory_t(): nodes(), cache() { init(); } + inline void init(); + inline node_t &node(hidx_t i) { return nodes[static_cast(i)]; } + inline const node_t &node(hidx_t i) const { return nodes[static_cast(i)]; } + template inline hidx_t link(ctx_t &ctx + , const typename ctx_t::conf_t &conf); + template static int32_t precedence(ctx_t &ctx + , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y + , int32_t &prec1, int32_t &prec2); + FORBID_COPY(zhistory_t); +}; + // tag history for Kuklewicz disambiguation (POSIX semantics) struct khistory_t { @@ -355,12 +391,27 @@ bool ran_or_fin_t::operator()(const conf_t &c) || c.state->type == nfa_state_t::FIN; } +void zhistory_t::init() +{ + nodes.clear(); + nodes.push_back(node_t(NOINFO, -1, 0, 0)); + cache.clear(); +} + void khistory_t::init() { nodes.clear(); nodes.push_back(node_t(NOINFO, -1)); } +template +hidx_t zhistory_t::link(ctx_t &ctx, const typename ctx_t::conf_t &conf) +{ + const int32_t i = static_cast(nodes.size()); + nodes.push_back(node_t(conf.state->tag.info, conf.thist, conf.origin, ctx.step)); + return i; +} + template hidx_t khistory_t::link(ctx_t &/* ctx */, const typename ctx_t::conf_t &conf) { diff --git a/lib/regexec_nfa_posix_trie.cc b/lib/regexec_nfa_posix_trie.cc index 90e9ec03..beb05cd9 100644 --- a/lib/regexec_nfa_posix_trie.cc +++ b/lib/regexec_nfa_posix_trie.cc @@ -19,9 +19,44 @@ template<> void init_gor1(libre2c::pzsimctx_t &ctx); namespace libre2c { -static inline void closure_posix(pzsimctx_t &ctx); +/* note [lazy computation and caching of precedence] + * + * Eagerly computing precedence values on each step for each pair of closure + * states is a waste of time: most of these values are not needed, because + * RE may be unambigous, or the given input string may be unambigous, or even + * if there is ambiguity, it may take only a few comparisons to resolve. All + * the rest is wasted effort. + * + * We can avoid it by delaying precedence computation until necessary, and + * then unwinding all the steps backwards, computing precedence for each step + * and caching the computed values (so that the same pair of histories is not + * compared twice). It is still the same incremental comparison as with + * precedence matrices: we compare step by step, going from the fork frame to + * the join frame. The result of comparison on each step is folded to a triple + * of numbers and recorded in cache. It is important that we do record each + * step, not just the final step, because the next pair of ambiguous histories + * may unwind to the same pair of prefixes that was compared before. + * + * For all this to work, it is necessary that we are able to store all history + * until the end, because at any step we might need to unwind an arbitrary + * number of steps back. We also need to address individual subhistories + * efficiently in order to use them as keys in the cache. All this is achieved + * by storing history in the form of a trie and addressing individual + * histories by indices in that trie. We also use trie to compute the final + * tag values (instead of storing tags in registers at each step). + */ + static void make_step(pzsimctx_t &, uint32_t); static void make_final_step(pzsimctx_t &); +template static int32_t zprecedence1(ctx_t &ctx + , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2); +template static int32_t zprecedence2(ctx_t &ctx + , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2); + +// we *do* want this to be inlined +static inline uint32_t get_step(const zhistory_t &hist, int32_t idx); +static inline uint32_t get_orig(const zhistory_t &hist, int32_t idx); +static inline void closure_posix(pzsimctx_t &ctx); int regexec_nfa_posix_trie(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) @@ -104,6 +139,128 @@ void make_final_step(pzsimctx_t &ctx) } } +template +int32_t zhistory_t::precedence(ctx_t &ctx + , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y + , int32_t &prec1, int32_t &prec2) +{ + return zprecedence1(ctx, x.thist, y.thist, prec1, prec2); +} + +// see note [lazy computation and caching of precedence] +template +int32_t zprecedence1(ctx_t &ctx, hidx_t idx1, hidx_t idx2 + , int32_t &prec1, int32_t &prec2) +{ + zhistory_t::cache_t &cache = ctx.history.cache; + int32_t prec = 0; + + // use the same cache entry for (x, y) and (y, x) + uint32_t k1 = static_cast(idx1), k2 = static_cast(idx2); + bool invert = k2 < k1; + if (invert) std::swap(k1, k2); + const uint64_t key = (static_cast(k1) << 32) | k2; + + zhistory_t::cache_t::const_iterator i = cache.find(key); + if (i != cache.end()) { + // use previously computed precedence values from cache + const zhistory_t::cache_entry_t &val = i->second; + prec1 = val.prec1; + prec2 = val.prec2; + prec = val.prec; + if (invert) { + std::swap(prec1, prec2); + prec = -prec; + } + } + else { + // compute precedence values and put them into cache + prec = zprecedence2(ctx, idx1, idx2, prec1, prec2); + zhistory_t::cache_entry_t val = {prec1, prec2, prec}; + if (invert) { + std::swap(val.prec1, val.prec2); + val.prec = -val.prec; + } + cache.insert(std::make_pair(key, val)); + } + + return prec; +} + +// see note [lazy computation and caching of precedence] +template +int32_t zprecedence2(ctx_t &ctx, hidx_t idx1, hidx_t idx2 + , int32_t &prec1, int32_t &prec2) +{ + if (idx1 == idx2) { + prec1 = prec2 = MAX_RHO; + return 0; + } + + const std::vector &tags = ctx.nfa.tags; + zhistory_t &hist = ctx.history; + + int32_t prec = 0; + prec1 = prec2 = MAX_RHO; + + int32_t i1 = idx1, i2 = idx2; + + const uint32_t o1 = get_orig(hist, idx1), o2 = get_orig(hist, idx2); + + uint32_t s1 = get_step(hist, i1), s2 = get_step(hist, i2); + const uint32_t s = std::max(s1, s2); + + const bool fork_frame = o1 == o2 && s1 == s2; + + tag_info_t info1 = NOINFO, info2 = NOINFO; + for (; i1 != i2 && (s1 >= s || s2 >= s);) { + if (s1 >= s && (i1 > i2 || s2 < s)) { + const zhistory_t::node_t &n = hist.node(i1); + info1 = n.info; + prec1 = std::min(prec1, tags[info1.idx].height); + i1 = n.pred; + s1 = get_step(hist, i1); + } + else { + const zhistory_t::node_t &n = hist.node(i2); + info2 = n.info; + prec2 = std::min(prec2, tags[info2.idx].height); + i2 = n.pred; + s2 = get_step(hist, i2); + } + } + if (!fork_frame) { + // recurse into previous steps (via cache) + int32_t p1, p2; + prec = zprecedence1(ctx, i1, i2, p1, p2); + prec1 = std::min(prec1, p1); + prec2 = std::min(prec2, p2); + } + else if (i1 != HROOT) { + const int32_t h = tags[hist.node(i1).info.idx].height; + prec1 = std::min(prec1, h); + prec2 = std::min(prec2, h); + } + + // longest precedence + if (prec1 > prec2) return -1; + if (prec1 < prec2) return 1; + + // leftmost precedence + return !fork_frame ? prec + : leftprec(info1, info2, i1 == idx1, i2 == idx2); +} + +uint32_t get_step(const zhistory_t &hist, int32_t idx) +{ + return idx == HROOT ? 0 : hist.node(idx).step; +} + +uint32_t get_orig(const zhistory_t &hist, int32_t idx) +{ + return idx == HROOT ? 0 : hist.node(idx).orig; +} + } // namespace libre2c template<> void init_gor1(libre2c::pzsimctx_t &ctx) diff --git a/src/dfa/posix_precedence.h b/src/dfa/posix_precedence.h index ab68ee80..f9b3680c 100644 --- a/src/dfa/posix_precedence.h +++ b/src/dfa/posix_precedence.h @@ -8,48 +8,14 @@ namespace re2c { -/* note [lazy computation and caching of precedence] - * - * Eagerly computing precedence values on each step for each pair of closure - * states is a waste of time: most of these values are not needed, because - * RE may be unambigous, or the given input string may be unambigous, or even - * if there is ambiguity, it may take only a few comparisons to resolve. All - * the rest is wasted effort. - * - * We can avoid it by delaying precedence computation until necessary, and - * then unwinding all the steps backwards, computing precedence for each step - * and caching the computed values (so that the same pair of histories is not - * compared twice). It is still the same incremental comparison as with - * precedence matrices: we compare step by step, going from the fork frame to - * the join frame. The result of comparison on each step is folded to a triple - * of numbers and recorded in cache. It is important that we do record each - * step, not just the final step, because the next pair of ambiguous histories - * may unwind to the same pair of prefixes that was compared before. - * - * For all this to work, it is necessary that we are able to store all history - * until the end, because at any step we might need to unwind an arbitrary - * number of steps back. We also need to address individual subhistories - * efficiently in order to use them as keys in the cache. All this is achieved - * by storing history in the form of a trie and addressing individual - * histories by indices in that trie. We also use trie to compute the final - * tag values (instead of storing tags in registers at each step). - */ - // maximum 29-bit (we have 30 bits, but highest must be non-negative) static const int32_t MAX_RHO = 0x1fffFFFF; -template static int32_t zprecedence1(ctx_t &ctx - , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2); -template static int32_t zprecedence2(ctx_t &ctx - , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2); - // we *do* want this to be inlined static inline int32_t leftprec(tag_info_t info1, tag_info_t info2, bool last1, bool last2); static inline int32_t unpack_longest(int32_t packed); static inline int32_t unpack_leftmost(int32_t packed); static inline int32_t pack(int32_t longest, int32_t leftmost); -static inline uint32_t get_step(const zhistory_t &hist, int32_t idx); -static inline uint32_t get_orig(const zhistory_t &hist, int32_t idx); template int32_t phistory_t::precedence(ctx_t &ctx @@ -315,118 +281,6 @@ void compute_prectable(ctx_t &ctx) } } -template -int32_t zhistory_t::precedence(ctx_t &ctx - , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y - , int32_t &prec1, int32_t &prec2) -{ - return zprecedence1(ctx, x.thist, y.thist, prec1, prec2); -} - -// see note [lazy computation and caching of precedence] -template -int32_t zprecedence1(ctx_t &ctx, hidx_t idx1, hidx_t idx2 - , int32_t &prec1, int32_t &prec2) -{ - zhistory_t::cache_t &cache = ctx.history.cache; - int32_t prec = 0; - - // use the same cache entry for (x, y) and (y, x) - uint32_t k1 = static_cast(idx1), k2 = static_cast(idx2); - bool invert = k2 < k1; - if (invert) std::swap(k1, k2); - const uint64_t key = (static_cast(k1) << 32) | k2; - - zhistory_t::cache_t::const_iterator i = cache.find(key); - if (i != cache.end()) { - // use previously computed precedence values from cache - const zhistory_t::cache_entry_t &val = i->second; - prec1 = val.prec1; - prec2 = val.prec2; - prec = val.prec; - if (invert) { - std::swap(prec1, prec2); - prec = -prec; - } - } - else { - // compute precedence values and put them into cache - prec = zprecedence2(ctx, idx1, idx2, prec1, prec2); - zhistory_t::cache_entry_t val = {prec1, prec2, prec}; - if (invert) { - std::swap(val.prec1, val.prec2); - val.prec = -val.prec; - } - cache.insert(std::make_pair(key, val)); - } - - return prec; -} - -// see note [lazy computation and caching of precedence] -template -int32_t zprecedence2(ctx_t &ctx, hidx_t idx1, hidx_t idx2 - , int32_t &prec1, int32_t &prec2) -{ - if (idx1 == idx2) { - prec1 = prec2 = MAX_RHO; - return 0; - } - - const std::vector &tags = ctx.nfa.tags; - zhistory_t &hist = ctx.history; - - int32_t prec = 0; - prec1 = prec2 = MAX_RHO; - - int32_t i1 = idx1, i2 = idx2; - - const uint32_t o1 = get_orig(hist, idx1), o2 = get_orig(hist, idx2); - - uint32_t s1 = get_step(hist, i1), s2 = get_step(hist, i2); - const uint32_t s = std::max(s1, s2); - - const bool fork_frame = o1 == o2 && s1 == s2; - - tag_info_t info1 = NOINFO, info2 = NOINFO; - for (; i1 != i2 && (s1 >= s || s2 >= s);) { - if (s1 >= s && (i1 > i2 || s2 < s)) { - const zhistory_t::node_t &n = hist.node(i1); - info1 = n.info; - prec1 = std::min(prec1, tags[info1.idx].height); - i1 = n.pred; - s1 = get_step(hist, i1); - } - else { - const zhistory_t::node_t &n = hist.node(i2); - info2 = n.info; - prec2 = std::min(prec2, tags[info2.idx].height); - i2 = n.pred; - s2 = get_step(hist, i2); - } - } - if (!fork_frame) { - // recurse into previous steps (via cache) - int32_t p1, p2; - prec = zprecedence1(ctx, i1, i2, p1, p2); - prec1 = std::min(prec1, p1); - prec2 = std::min(prec2, p2); - } - else if (i1 != HROOT) { - const int32_t h = tags[hist.node(i1).info.idx].height; - prec1 = std::min(prec1, h); - prec2 = std::min(prec2, h); - } - - // longest precedence - if (prec1 > prec2) return -1; - if (prec1 < prec2) return 1; - - // leftmost precedence - return !fork_frame ? prec - : leftprec(info1, info2, i1 == idx1, i2 == idx2); -} - int32_t unpack_longest(int32_t packed) { // take lower 30 bits and sign-extend @@ -455,16 +309,6 @@ int32_t pack(int32_t longest, int32_t leftmost) return packed; } -uint32_t get_step(const zhistory_t &hist, int32_t idx) -{ - return idx == HROOT ? 0 : hist.node(idx).step; -} - -uint32_t get_orig(const zhistory_t &hist, int32_t idx) -{ - return idx == HROOT ? 0 : hist.node(idx).orig; -} - } // namespace re2c #endif // _RE2C_DFA_POSIX_PRECEDENCE_ diff --git a/src/dfa/tag_history.h b/src/dfa/tag_history.h index c14aebb4..aed19d8f 100644 --- a/src/dfa/tag_history.h +++ b/src/dfa/tag_history.h @@ -84,42 +84,6 @@ struct lhistory_t FORBID_COPY(lhistory_t); }; -// tag history for lazy disambiguation (both POSIX and leftmost greedy) -struct zhistory_t -{ - struct node_t { - tag_info_t info; - hidx_t pred; - uint32_t orig; - uint32_t step; - - inline node_t(tag_info_t info, hidx_t pred, uint32_t orig, uint32_t step) - : info(info), pred(pred), orig(orig), step(step) {} - }; - - struct cache_entry_t - { - int32_t prec1; - int32_t prec2; - int32_t prec; - }; - typedef std::map cache_t; - - std::vector nodes; - cache_t cache; - - inline zhistory_t(): nodes(), cache() { init(); } - inline void init(); - inline node_t &node(hidx_t i) { return nodes[static_cast(i)]; } - inline const node_t &node(hidx_t i) const { return nodes[static_cast(i)]; } - template inline hidx_t link(ctx_t &ctx - , const typename ctx_t::conf_t &conf); - template static int32_t precedence(ctx_t &ctx - , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y - , int32_t &prec1, int32_t &prec2); - FORBID_COPY(zhistory_t); -}; - void phistory_t::init() { nodes.clear(); @@ -133,13 +97,6 @@ void lhistory_t::init() nodes.push_back(node_t(NOINFO, -1)); } -void zhistory_t::init() -{ - nodes.clear(); - nodes.push_back(node_t(NOINFO, -1, 0, 0)); - cache.clear(); -} - void phistory_t::detach() { // don't delete existing tree, just detach it from root @@ -178,14 +135,6 @@ hidx_t lhistory_t::link(ctx_t &/* ctx */, const typename ctx_t::conf_t &conf) return i; } -template -hidx_t zhistory_t::link(ctx_t &ctx, const typename ctx_t::conf_t &conf) -{ - const int32_t i = static_cast(nodes.size()); - nodes.push_back(node_t(conf.state->tag.info, conf.thist, conf.origin, ctx.step)); - return i; -} - template tagver_t last(const history_t &h, hidx_t i, size_t t) { diff --git a/src/util/string_utils.h b/src/util/string_utils.h index d0266639..d9c2c95f 100644 --- a/src/util/string_utils.h +++ b/src/util/string_utils.h @@ -7,10 +7,8 @@ namespace re2c { -template void strrreplace( - std::string &s, - const std::string &s1, - const type_t &v) +template +void strrreplace(std::string &s, const std::string &s1, const type_t &v) { std::ostringstream sv; sv << v; @@ -26,7 +24,7 @@ template void strrreplace( } template -static std::string to_string(const T &v) +std::string to_string(const T &v) { std::ostringstream s; s << v; -- 2.40.0