FORBID_COPY(simctx_t);
};
+// tag history for lazy disambiguation (both POSIX and leftmost greedy)
+struct zhistory_t
+{
+ struct node_t {
+ tag_info_t info;
+ hidx_t pred;
+ uint32_t orig;
+ uint32_t step;
+
+ inline node_t(tag_info_t info, hidx_t pred, uint32_t orig, uint32_t step)
+ : info(info), pred(pred), orig(orig), step(step) {}
+ };
+
+ struct cache_entry_t
+ {
+ int32_t prec1;
+ int32_t prec2;
+ int32_t prec;
+ };
+ typedef std::map<uint64_t, cache_entry_t> cache_t;
+
+ std::vector<node_t> nodes;
+ cache_t cache;
+
+ inline zhistory_t(): nodes(), cache() { init(); }
+ inline void init();
+ inline node_t &node(hidx_t i) { return nodes[static_cast<uint32_t>(i)]; }
+ inline const node_t &node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
+ template<typename ctx_t> inline hidx_t link(ctx_t &ctx
+ , const typename ctx_t::conf_t &conf);
+ template<typename ctx_t> static int32_t precedence(ctx_t &ctx
+ , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
+ , int32_t &prec1, int32_t &prec2);
+ FORBID_COPY(zhistory_t);
+};
+
// tag history for Kuklewicz disambiguation (POSIX semantics)
struct khistory_t
{
|| c.state->type == nfa_state_t::FIN;
}
+void zhistory_t::init()
+{
+ nodes.clear();
+ nodes.push_back(node_t(NOINFO, -1, 0, 0));
+ cache.clear();
+}
+
void khistory_t::init()
{
nodes.clear();
nodes.push_back(node_t(NOINFO, -1));
}
+template<typename ctx_t>
+hidx_t zhistory_t::link(ctx_t &ctx, const typename ctx_t::conf_t &conf)
+{
+ const int32_t i = static_cast<int32_t>(nodes.size());
+ nodes.push_back(node_t(conf.state->tag.info, conf.thist, conf.origin, ctx.step));
+ return i;
+}
+
template<typename ctx_t>
hidx_t khistory_t::link(ctx_t &/* ctx */, const typename ctx_t::conf_t &conf)
{
namespace libre2c {
-static inline void closure_posix(pzsimctx_t &ctx);
+/* note [lazy computation and caching of precedence]
+ *
+ * Eagerly computing precedence values on each step for each pair of closure
+ * states is a waste of time: most of these values are not needed, because
+ * RE may be unambigous, or the given input string may be unambigous, or even
+ * if there is ambiguity, it may take only a few comparisons to resolve. All
+ * the rest is wasted effort.
+ *
+ * We can avoid it by delaying precedence computation until necessary, and
+ * then unwinding all the steps backwards, computing precedence for each step
+ * and caching the computed values (so that the same pair of histories is not
+ * compared twice). It is still the same incremental comparison as with
+ * precedence matrices: we compare step by step, going from the fork frame to
+ * the join frame. The result of comparison on each step is folded to a triple
+ * of numbers and recorded in cache. It is important that we do record each
+ * step, not just the final step, because the next pair of ambiguous histories
+ * may unwind to the same pair of prefixes that was compared before.
+ *
+ * For all this to work, it is necessary that we are able to store all history
+ * until the end, because at any step we might need to unwind an arbitrary
+ * number of steps back. We also need to address individual subhistories
+ * efficiently in order to use them as keys in the cache. All this is achieved
+ * by storing history in the form of a trie and addressing individual
+ * histories by indices in that trie. We also use trie to compute the final
+ * tag values (instead of storing tags in registers at each step).
+ */
+
static void make_step(pzsimctx_t &, uint32_t);
static void make_final_step(pzsimctx_t &);
+template<typename ctx_t> static int32_t zprecedence1(ctx_t &ctx
+ , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2);
+template<typename ctx_t> static int32_t zprecedence2(ctx_t &ctx
+ , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2);
+
+// we *do* want this to be inlined
+static inline uint32_t get_step(const zhistory_t &hist, int32_t idx);
+static inline uint32_t get_orig(const zhistory_t &hist, int32_t idx);
+static inline void closure_posix(pzsimctx_t &ctx);
int regexec_nfa_posix_trie(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int)
}
}
+template<typename ctx_t>
+int32_t zhistory_t::precedence(ctx_t &ctx
+ , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
+ , int32_t &prec1, int32_t &prec2)
+{
+ return zprecedence1(ctx, x.thist, y.thist, prec1, prec2);
+}
+
+// see note [lazy computation and caching of precedence]
+template<typename ctx_t>
+int32_t zprecedence1(ctx_t &ctx, hidx_t idx1, hidx_t idx2
+ , int32_t &prec1, int32_t &prec2)
+{
+ zhistory_t::cache_t &cache = ctx.history.cache;
+ int32_t prec = 0;
+
+ // use the same cache entry for (x, y) and (y, x)
+ uint32_t k1 = static_cast<uint32_t>(idx1), k2 = static_cast<uint32_t>(idx2);
+ bool invert = k2 < k1;
+ if (invert) std::swap(k1, k2);
+ const uint64_t key = (static_cast<uint64_t>(k1) << 32) | k2;
+
+ zhistory_t::cache_t::const_iterator i = cache.find(key);
+ if (i != cache.end()) {
+ // use previously computed precedence values from cache
+ const zhistory_t::cache_entry_t &val = i->second;
+ prec1 = val.prec1;
+ prec2 = val.prec2;
+ prec = val.prec;
+ if (invert) {
+ std::swap(prec1, prec2);
+ prec = -prec;
+ }
+ }
+ else {
+ // compute precedence values and put them into cache
+ prec = zprecedence2(ctx, idx1, idx2, prec1, prec2);
+ zhistory_t::cache_entry_t val = {prec1, prec2, prec};
+ if (invert) {
+ std::swap(val.prec1, val.prec2);
+ val.prec = -val.prec;
+ }
+ cache.insert(std::make_pair(key, val));
+ }
+
+ return prec;
+}
+
+// see note [lazy computation and caching of precedence]
+template<typename ctx_t>
+int32_t zprecedence2(ctx_t &ctx, hidx_t idx1, hidx_t idx2
+ , int32_t &prec1, int32_t &prec2)
+{
+ if (idx1 == idx2) {
+ prec1 = prec2 = MAX_RHO;
+ return 0;
+ }
+
+ const std::vector<Tag> &tags = ctx.nfa.tags;
+ zhistory_t &hist = ctx.history;
+
+ int32_t prec = 0;
+ prec1 = prec2 = MAX_RHO;
+
+ int32_t i1 = idx1, i2 = idx2;
+
+ const uint32_t o1 = get_orig(hist, idx1), o2 = get_orig(hist, idx2);
+
+ uint32_t s1 = get_step(hist, i1), s2 = get_step(hist, i2);
+ const uint32_t s = std::max(s1, s2);
+
+ const bool fork_frame = o1 == o2 && s1 == s2;
+
+ tag_info_t info1 = NOINFO, info2 = NOINFO;
+ for (; i1 != i2 && (s1 >= s || s2 >= s);) {
+ if (s1 >= s && (i1 > i2 || s2 < s)) {
+ const zhistory_t::node_t &n = hist.node(i1);
+ info1 = n.info;
+ prec1 = std::min(prec1, tags[info1.idx].height);
+ i1 = n.pred;
+ s1 = get_step(hist, i1);
+ }
+ else {
+ const zhistory_t::node_t &n = hist.node(i2);
+ info2 = n.info;
+ prec2 = std::min(prec2, tags[info2.idx].height);
+ i2 = n.pred;
+ s2 = get_step(hist, i2);
+ }
+ }
+ if (!fork_frame) {
+ // recurse into previous steps (via cache)
+ int32_t p1, p2;
+ prec = zprecedence1(ctx, i1, i2, p1, p2);
+ prec1 = std::min(prec1, p1);
+ prec2 = std::min(prec2, p2);
+ }
+ else if (i1 != HROOT) {
+ const int32_t h = tags[hist.node(i1).info.idx].height;
+ prec1 = std::min(prec1, h);
+ prec2 = std::min(prec2, h);
+ }
+
+ // longest precedence
+ if (prec1 > prec2) return -1;
+ if (prec1 < prec2) return 1;
+
+ // leftmost precedence
+ return !fork_frame ? prec
+ : leftprec(info1, info2, i1 == idx1, i2 == idx2);
+}
+
+uint32_t get_step(const zhistory_t &hist, int32_t idx)
+{
+ return idx == HROOT ? 0 : hist.node(idx).step;
+}
+
+uint32_t get_orig(const zhistory_t &hist, int32_t idx)
+{
+ return idx == HROOT ? 0 : hist.node(idx).orig;
+}
+
} // namespace libre2c
template<> void init_gor1<libre2c::pzsimctx_t>(libre2c::pzsimctx_t &ctx)
namespace re2c {
-/* note [lazy computation and caching of precedence]
- *
- * Eagerly computing precedence values on each step for each pair of closure
- * states is a waste of time: most of these values are not needed, because
- * RE may be unambigous, or the given input string may be unambigous, or even
- * if there is ambiguity, it may take only a few comparisons to resolve. All
- * the rest is wasted effort.
- *
- * We can avoid it by delaying precedence computation until necessary, and
- * then unwinding all the steps backwards, computing precedence for each step
- * and caching the computed values (so that the same pair of histories is not
- * compared twice). It is still the same incremental comparison as with
- * precedence matrices: we compare step by step, going from the fork frame to
- * the join frame. The result of comparison on each step is folded to a triple
- * of numbers and recorded in cache. It is important that we do record each
- * step, not just the final step, because the next pair of ambiguous histories
- * may unwind to the same pair of prefixes that was compared before.
- *
- * For all this to work, it is necessary that we are able to store all history
- * until the end, because at any step we might need to unwind an arbitrary
- * number of steps back. We also need to address individual subhistories
- * efficiently in order to use them as keys in the cache. All this is achieved
- * by storing history in the form of a trie and addressing individual
- * histories by indices in that trie. We also use trie to compute the final
- * tag values (instead of storing tags in registers at each step).
- */
-
// maximum 29-bit (we have 30 bits, but highest must be non-negative)
static const int32_t MAX_RHO = 0x1fffFFFF;
-template<typename ctx_t> static int32_t zprecedence1(ctx_t &ctx
- , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2);
-template<typename ctx_t> static int32_t zprecedence2(ctx_t &ctx
- , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2);
-
// we *do* want this to be inlined
static inline int32_t leftprec(tag_info_t info1, tag_info_t info2, bool last1, bool last2);
static inline int32_t unpack_longest(int32_t packed);
static inline int32_t unpack_leftmost(int32_t packed);
static inline int32_t pack(int32_t longest, int32_t leftmost);
-static inline uint32_t get_step(const zhistory_t &hist, int32_t idx);
-static inline uint32_t get_orig(const zhistory_t &hist, int32_t idx);
template<typename ctx_t>
int32_t phistory_t::precedence(ctx_t &ctx
}
}
-template<typename ctx_t>
-int32_t zhistory_t::precedence(ctx_t &ctx
- , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
- , int32_t &prec1, int32_t &prec2)
-{
- return zprecedence1(ctx, x.thist, y.thist, prec1, prec2);
-}
-
-// see note [lazy computation and caching of precedence]
-template<typename ctx_t>
-int32_t zprecedence1(ctx_t &ctx, hidx_t idx1, hidx_t idx2
- , int32_t &prec1, int32_t &prec2)
-{
- zhistory_t::cache_t &cache = ctx.history.cache;
- int32_t prec = 0;
-
- // use the same cache entry for (x, y) and (y, x)
- uint32_t k1 = static_cast<uint32_t>(idx1), k2 = static_cast<uint32_t>(idx2);
- bool invert = k2 < k1;
- if (invert) std::swap(k1, k2);
- const uint64_t key = (static_cast<uint64_t>(k1) << 32) | k2;
-
- zhistory_t::cache_t::const_iterator i = cache.find(key);
- if (i != cache.end()) {
- // use previously computed precedence values from cache
- const zhistory_t::cache_entry_t &val = i->second;
- prec1 = val.prec1;
- prec2 = val.prec2;
- prec = val.prec;
- if (invert) {
- std::swap(prec1, prec2);
- prec = -prec;
- }
- }
- else {
- // compute precedence values and put them into cache
- prec = zprecedence2(ctx, idx1, idx2, prec1, prec2);
- zhistory_t::cache_entry_t val = {prec1, prec2, prec};
- if (invert) {
- std::swap(val.prec1, val.prec2);
- val.prec = -val.prec;
- }
- cache.insert(std::make_pair(key, val));
- }
-
- return prec;
-}
-
-// see note [lazy computation and caching of precedence]
-template<typename ctx_t>
-int32_t zprecedence2(ctx_t &ctx, hidx_t idx1, hidx_t idx2
- , int32_t &prec1, int32_t &prec2)
-{
- if (idx1 == idx2) {
- prec1 = prec2 = MAX_RHO;
- return 0;
- }
-
- const std::vector<Tag> &tags = ctx.nfa.tags;
- zhistory_t &hist = ctx.history;
-
- int32_t prec = 0;
- prec1 = prec2 = MAX_RHO;
-
- int32_t i1 = idx1, i2 = idx2;
-
- const uint32_t o1 = get_orig(hist, idx1), o2 = get_orig(hist, idx2);
-
- uint32_t s1 = get_step(hist, i1), s2 = get_step(hist, i2);
- const uint32_t s = std::max(s1, s2);
-
- const bool fork_frame = o1 == o2 && s1 == s2;
-
- tag_info_t info1 = NOINFO, info2 = NOINFO;
- for (; i1 != i2 && (s1 >= s || s2 >= s);) {
- if (s1 >= s && (i1 > i2 || s2 < s)) {
- const zhistory_t::node_t &n = hist.node(i1);
- info1 = n.info;
- prec1 = std::min(prec1, tags[info1.idx].height);
- i1 = n.pred;
- s1 = get_step(hist, i1);
- }
- else {
- const zhistory_t::node_t &n = hist.node(i2);
- info2 = n.info;
- prec2 = std::min(prec2, tags[info2.idx].height);
- i2 = n.pred;
- s2 = get_step(hist, i2);
- }
- }
- if (!fork_frame) {
- // recurse into previous steps (via cache)
- int32_t p1, p2;
- prec = zprecedence1(ctx, i1, i2, p1, p2);
- prec1 = std::min(prec1, p1);
- prec2 = std::min(prec2, p2);
- }
- else if (i1 != HROOT) {
- const int32_t h = tags[hist.node(i1).info.idx].height;
- prec1 = std::min(prec1, h);
- prec2 = std::min(prec2, h);
- }
-
- // longest precedence
- if (prec1 > prec2) return -1;
- if (prec1 < prec2) return 1;
-
- // leftmost precedence
- return !fork_frame ? prec
- : leftprec(info1, info2, i1 == idx1, i2 == idx2);
-}
-
int32_t unpack_longest(int32_t packed)
{
// take lower 30 bits and sign-extend
return packed;
}
-uint32_t get_step(const zhistory_t &hist, int32_t idx)
-{
- return idx == HROOT ? 0 : hist.node(idx).step;
-}
-
-uint32_t get_orig(const zhistory_t &hist, int32_t idx)
-{
- return idx == HROOT ? 0 : hist.node(idx).orig;
-}
-
} // namespace re2c
#endif // _RE2C_DFA_POSIX_PRECEDENCE_
FORBID_COPY(lhistory_t);
};
-// tag history for lazy disambiguation (both POSIX and leftmost greedy)
-struct zhistory_t
-{
- struct node_t {
- tag_info_t info;
- hidx_t pred;
- uint32_t orig;
- uint32_t step;
-
- inline node_t(tag_info_t info, hidx_t pred, uint32_t orig, uint32_t step)
- : info(info), pred(pred), orig(orig), step(step) {}
- };
-
- struct cache_entry_t
- {
- int32_t prec1;
- int32_t prec2;
- int32_t prec;
- };
- typedef std::map<uint64_t, cache_entry_t> cache_t;
-
- std::vector<node_t> nodes;
- cache_t cache;
-
- inline zhistory_t(): nodes(), cache() { init(); }
- inline void init();
- inline node_t &node(hidx_t i) { return nodes[static_cast<uint32_t>(i)]; }
- inline const node_t &node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
- template<typename ctx_t> inline hidx_t link(ctx_t &ctx
- , const typename ctx_t::conf_t &conf);
- template<typename ctx_t> static int32_t precedence(ctx_t &ctx
- , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
- , int32_t &prec1, int32_t &prec2);
- FORBID_COPY(zhistory_t);
-};
-
void phistory_t::init()
{
nodes.clear();
nodes.push_back(node_t(NOINFO, -1));
}
-void zhistory_t::init()
-{
- nodes.clear();
- nodes.push_back(node_t(NOINFO, -1, 0, 0));
- cache.clear();
-}
-
void phistory_t::detach()
{
// don't delete existing tree, just detach it from root
return i;
}
-template<typename ctx_t>
-hidx_t zhistory_t::link(ctx_t &ctx, const typename ctx_t::conf_t &conf)
-{
- const int32_t i = static_cast<int32_t>(nodes.size());
- nodes.push_back(node_t(conf.state->tag.info, conf.thist, conf.origin, ctx.step));
- return i;
-}
-
template<typename history_t>
tagver_t last(const history_t &h, hidx_t i, size_t t)
{
namespace re2c {
-template<typename type_t> void strrreplace(
- std::string &s,
- const std::string &s1,
- const type_t &v)
+template<typename type_t>
+void strrreplace(std::string &s, const std::string &s1, const type_t &v)
{
std::ostringstream sv;
sv << v;
}
template<typename T>
-static std::string to_string(const T &v)
+std::string to_string(const T &v)
{
std::ostringstream s;
s << v;