From f33c3d99ab1a888b7c9aba318af6b4677a7e5192 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Wed, 27 Feb 2019 10:50:07 +0000 Subject: [PATCH] libre2c: use a more efficient algorithm for computing POSIX precedence matrix. The old alrorithm was straightforward: it computed precedence for each pair of core closure items. This has worst-case cubic complexity in the size of TNFA, because closure can have O(N) states and tag history length can be also O(N). One example is ((a?){1,1000})*. The new algorithm avoids that by computing parecedence for all pairs of core items in one traversal of the shortest path tree constructed by closure. It avoids traversing deadend branches (those that were cancelled by search because a better path was found) by marking used nodes of the tree from each leaf down to the root. The new algorithm has worst-case quadratic behaviour. It can be slightly slower for simple cases when the number of core items is low and histories are short. --- re2c/lib/regex.h | 1 + re2c/lib/regex_impl.h | 84 ++++++- re2c/lib/regexec.cc | 24 +- re2c/lib/regexec_nfa_leftmost.cc | 4 +- re2c/lib/regexec_nfa_posix.cc | 351 ++++++++++++++++++++++------- re2c/lib/regexec_nfa_posix_trie.cc | 10 +- re2c/lib/test.cc | 19 +- 7 files changed, 389 insertions(+), 104 deletions(-) diff --git a/re2c/lib/regex.h b/re2c/lib/regex.h index 1b9c1f01..2849dae3 100644 --- a/re2c/lib/regex.h +++ b/re2c/lib/regex.h @@ -38,6 +38,7 @@ static const int REG_NFA = 1u << 6; static const int REG_LEFTMOST = 1u << 7; static const int REG_TRIE = 1u << 8; static const int REG_GTOP = 1u << 9; +static const int REG_SLOWPREC = 1u << 10; struct regex_t { diff --git a/re2c/lib/regex_impl.h b/re2c/lib/regex_impl.h index 3cf9ce2f..f5fb9c7a 100644 --- a/re2c/lib/regex_impl.h +++ b/re2c/lib/regex_impl.h @@ -16,22 +16,54 @@ namespace libre2c { typedef std::vector tag_path_t; +const tag_info_t NOINFO = {0x3fffFFFF, 0}; + +static const uint32_t NONFIN = ~0u; +static const uint32_t USED = NONFIN - 1; + struct history_t { + struct arc_t { + int32_t node; + int32_t prev; + int32_t next; + + inline arc_t(int32_t node, int32_t prev, int32_t next) + : node(node), prev(prev), next(next) {} + }; + struct node_t { - int32_t pred; - uint32_t step; tag_info_t info; - uint32_t orig; + int32_t pred; + union { + int32_t last; + uint32_t step; + }; + union { + int32_t next; + uint32_t orig; + }; + uint32_t finidx; + + inline node_t(tag_info_t info, int32_t pred) + : info(info), pred(pred), last(-1), next(-1), finidx(NONFIN) {} + inline node_t(tag_info_t info, int32_t pred, uint32_t step, uint32_t orig) + : info(info), pred(pred), step(step), orig(orig), finidx(NONFIN) {} }; static const int32_t ROOT; std::vector nodes; + std::vector arcs; explicit history_t(size_t nstates); - inline const node_t &at(int32_t i) const { return nodes[static_cast(i)]; } + inline void init(); + inline node_t &node(int32_t i) { return nodes[static_cast(i)]; } + inline const node_t &node(int32_t i) const { return nodes[static_cast(i)]; } + inline arc_t &arc(int32_t i) { return arcs[static_cast(i)]; } + inline const arc_t &arc(int32_t i) const { return arcs[static_cast(i)]; } inline int32_t push(int32_t i, uint32_t step, tag_info_t info, uint32_t orig); + inline int32_t push(tag_info_t info, int32_t idx); FORBID_COPY(history_t); }; @@ -46,6 +78,14 @@ struct conf_t : state(s), origin(o), thist(h) {} }; +struct histleaf_t +{ + uint32_t coreid; + uint32_t origin; + int32_t hidx; + int32_t height; +}; + struct ran_or_fin_t { inline bool operator()(const conf_t &c); @@ -93,6 +133,10 @@ struct simctx_t int32_t *prectbl1; int32_t *prectbl2; cache_t cache; + std::vector histlevel; + std::vector sortcores; + std::vector fincount; + std::vector worklist; std::vector gor1_topsort; std::vector gor1_linear; @@ -113,11 +157,37 @@ int regexec_nfa_posix_trie(const regex_t *preg, const char *string, size_t nmatc int regexec_nfa_leftmost(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); +void history_t::init() +{ + nodes.clear(); + arcs.clear(); + nodes.push_back(node_t(NOINFO, -1)); +} + +int32_t history_t::push(tag_info_t info, int32_t idx) +{ + const int32_t i = static_cast(nodes.size()); + if (idx != -1) { + node_t &n = node(idx); + const int32_t a = static_cast(arcs.size()); + arcs.push_back(arc_t(i, n.last, -1)); + if (n.next == -1) { + n.next = a; + } + else { + arc(n.last).next = a; + } + n.last = a; + } + nodes.push_back(node_t(info, idx)); + return i; +} + int32_t history_t::push(int32_t idx, uint32_t step, tag_info_t info, uint32_t orig) { - const node_t x = {idx, step, info, orig}; - nodes.push_back(x); - return static_cast(nodes.size() - 1); + const int32_t i = static_cast(nodes.size()); + nodes.push_back(node_t(info, idx, step, orig)); + return i; } bool ran_or_fin_t::operator()(const conf_t &c) diff --git a/re2c/lib/regexec.cc b/re2c/lib/regexec.cc index 7770e2c3..2fec52c0 100644 --- a/re2c/lib/regexec.cc +++ b/re2c/lib/regexec.cc @@ -29,7 +29,7 @@ int regexec(const regex_t *preg, const char *string, size_t nmatch, namespace re2c { namespace libre2c { -const int32_t history_t::ROOT = -1; +const int32_t history_t::ROOT = 0; int finalize(const simctx_t &ctx, const char *string, size_t nmatch, regmatch_t pmatch[]) @@ -48,7 +48,7 @@ int finalize(const simctx_t &ctx, const char *string, size_t nmatch, memset(done, 0, ctx.nsub * sizeof(bool)); for (int32_t i = ctx.hidx; todo > 0 && i != history_t::ROOT; ) { - const history_t::node_t &n = ctx.hist.at(i); + const history_t::node_t &n = ctx.hist.node(i); const Tag &tag = tags[n.info.idx]; const size_t t = tag.ncap; if (!fictive(tag) && t < nmatch * 2 && !done[t]) { @@ -88,6 +88,10 @@ simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags) , prectbl1(NULL) , prectbl2(NULL) , cache() + , histlevel() + , sortcores() + , fincount() + , worklist() , gor1_topsort() , gor1_linear() , gtop_heap_storage() @@ -111,6 +115,10 @@ simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags) if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) { prectbl1 = new int32_t[ncores * ncores]; prectbl2 = new int32_t[ncores * ncores]; + histlevel.reserve(ncores); + sortcores.reserve(ncores); + fincount.resize(ncores + 1); + worklist.reserve(nstates); } if (flags & REG_GTOP) { @@ -140,21 +148,27 @@ void init(simctx_t &ctx, const char *string) { ctx.reach.clear(); ctx.state.clear(); - ctx.hist.nodes.clear(); + ctx.hist.init(); ctx.hidx = history_t::ROOT; ctx.step = 0; ctx.rule = Rule::NONE; ctx.cursor = ctx.marker = string; ctx.cache.clear(); + ctx.histlevel.clear(); + ctx.sortcores.clear(); + DASSERT(ctx.worklist.empty()); DASSERT(ctx.gor1_topsort.empty()); DASSERT(ctx.gor1_linear.empty()); DASSERT(ctx.gtop_heap.empty()); } -history_t::history_t(size_t nstates) +history_t::history_t(size_t /* nstates */) : nodes() + , arcs() { - nodes.reserve(nstates); + // nodes.reserve(nstates); + // arcs.reserve(nstates); + init(); } } // namespace libre2c diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc index b1c8d6d9..3e24ce75 100644 --- a/re2c/lib/regexec_nfa_leftmost.cc +++ b/re2c/lib/regexec_nfa_leftmost.cc @@ -90,7 +90,7 @@ void reach_on_symbol(simctx_t &ctx, uint32_t sym) } std::swap(ctx.offsets1, ctx.offsets2); - ctx.hist.nodes.clear(); + ctx.hist.init(); } void closure_leftmost(simctx_t &ctx) @@ -148,7 +148,7 @@ void update_offsets(simctx_t &ctx, const conf_t &c) memset(done, 0, nsub * sizeof(bool)); for (int32_t i = c.thist; i != history_t::ROOT; ) { - const history_t::node_t &n = ctx.hist.at(i); + const history_t::node_t &n = ctx.hist.node(i); const size_t t = n.info.idx; if (!done[t]) { done[t] = true; diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc index 6ee281f7..e80a40f5 100644 --- a/re2c/lib/regexec_nfa_posix.cc +++ b/re2c/lib/regexec_nfa_posix.cc @@ -29,12 +29,14 @@ static void make_one_step(simctx_t &, uint32_t); static void make_final_step(simctx_t &); static void update_offsets(simctx_t &ctx, const conf_t &c); static void update_prectbl(simctx_t &); +static void update_prectbl_naive(simctx_t &ctx); static int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y, int32_t &prec1, int32_t &prec2); // we *do* want these to be inlined static inline bool scan(simctx_t &ctx, nfa_state_t *q, bool all); static inline bool relax_gor1(simctx_t &, const conf_t &); static inline void relax_gtop(simctx_t &, const conf_t &); +static inline int32_t leftprec(simctx_t &, tag_info_t info1, tag_info_t info2, bool last1, bool last2); int regexec_nfa_posix(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int eflags) @@ -116,10 +118,14 @@ void make_one_step(simctx_t &ctx, uint32_t sym) state.resize(j); std::swap(ctx.offsets1, ctx.offsets2); - update_prectbl(ctx); - - ctx.hist.nodes.clear(); + if (!(ctx.flags & REG_SLOWPREC)) { + update_prectbl(ctx); + } + else { + update_prectbl_naive(ctx); + } + ctx.hist.init(); ++ctx.step; } @@ -228,7 +234,7 @@ bool scan(simctx_t &ctx, nfa_state_t *q, bool all) case nfa_state_t::TAG: if (q->arcidx == 0) { any |= relax_gor1(ctx, conf_t(q->tag.out, o - , ctx.hist.push(h, ctx.step, q->tag.info, o))); + , ctx.hist.push(q->tag.info, h))); ++q->arcidx; } break; @@ -300,7 +306,7 @@ void closure_posix(simctx_t &ctx) break; case nfa_state_t::TAG: relax_gtop(ctx, conf_t(q->tag.out, o - , ctx.hist.push(h, ctx.step, q->tag.info, o))); + , ctx.hist.push(q->tag.info, h))); break; default: break; @@ -333,61 +339,6 @@ void relax_gtop(simctx_t &ctx, const conf_t &c) } } -void update_offsets(simctx_t &ctx, const conf_t &c) -{ - const size_t nsub = ctx.nsub; - bool *done = ctx.done; - regoff_t *o; - const std::vector &tags = ctx.nfa->tags; - nfa_state_t *s = c.state; - - if (s->type == nfa_state_t::FIN) { - ctx.marker = ctx.cursor; - ctx.rule = 0; - o = ctx.offsets3; - } - else { - o = ctx.offsets1 + s->coreid * nsub; - } - - memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t)); - memset(done, 0, nsub * sizeof(bool)); - - for (int32_t i = c.thist; i != history_t::ROOT; ) { - const history_t::node_t &n = ctx.hist.at(i); - const Tag &tag = tags[n.info.idx]; - const size_t t = tag.ncap; - if (!fictive(tag) && !done[t]) { - done[t] = true; - o[t] = n.info.neg ? -1 : static_cast(ctx.step); - } - i = n.pred; - } -} - -void update_prectbl(simctx_t &ctx) -{ - cconfiter_t b = ctx.state.begin(), e = ctx.state.end(), c, d; - const size_t ncores = ctx.nfa->ncores; - int32_t *ptbl = ctx.prectbl2; - const int32_t p0 = pack(MAX_RHO, 0); - - for (c = b; c != e; ++c) { - nfa_state_t *s = c->state; - DASSERT (s->type == nfa_state_t::RAN); - ptbl[s->coreid * ncores + s->coreid] = p0; - - for (d = c + 1; d != e; ++d) { - nfa_state_t *q = d->state; - int32_t prec1, prec2; - int32_t prec = precedence(ctx, *c, *d, prec1, prec2); - ptbl[s->coreid * ncores + q->coreid] = pack(prec1, prec); - ptbl[q->coreid * ncores + s->coreid] = pack(prec2, -prec); - } - } - std::swap(ctx.prectbl1, ctx.prectbl2); -} - int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y , int32_t &prec1, int32_t &prec2) { @@ -415,13 +366,13 @@ int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y int32_t i1 = idx1, i2 = idx2; for (; i1 != i2; ) { if (i1 > i2) { - const history_t::node_t &n = hist.at(i1); + const history_t::node_t &n = hist.node(i1); info1 = n.info; prec1 = std::min(prec1, tags[info1.idx].height); i1 = n.pred; } else { - const history_t::node_t &n = hist.at(i2); + const history_t::node_t &n = hist.node(i2); info2 = n.info; prec2 = std::min(prec2, tags[info2.idx].height); i2 = n.pred; @@ -429,7 +380,7 @@ int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y } if (i1 != history_t::ROOT) { DASSERT(fork_frame); - const int32_t h = tags[hist.at(i1).info.idx].height; + const int32_t h = tags[hist.node(i1).info.idx].height; prec1 = std::min(prec1, h); prec2 = std::min(prec2, h); } @@ -439,37 +390,269 @@ int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y if (prec1 < prec2) return 1; // leftmost precedence - if (fork_frame) { - // equal => not less - if (i1 == idx1 && i2 == idx2) return 0; + return fork_frame + ? leftprec(ctx, info1, info2, i1 == idx1, i2 == idx2) + : unpack_leftmost(ctx.prectbl1[orig1 * ctx.nfa->ncores + orig2]); +} + +int32_t leftprec(simctx_t &, tag_info_t info1, tag_info_t info2, bool last1, bool last2) +{ + // equal => not less + if (last1 && last2) return 0; - // shorter => less - if (i1 == idx1) return -1; - if (i2 == idx2) return 1; + // shorter => less + if (last1) return -1; + if (last2) return 1; - const uint32_t tag1 = info1.idx, tag2 = info2.idx; - const bool neg1 = info1.neg, neg2 = info2.neg; + const uint32_t tag1 = info1.idx, tag2 = info2.idx; + const bool neg1 = info1.neg, neg2 = info2.neg; - // can't be both closing - DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1)); + // can't be both closing + DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1)); - // closing vs opening: closing wins - if (tag1 % 2 == 1) return -1; - if (tag2 % 2 == 1) return 1; + // closing vs opening: closing wins + if (tag1 % 2 == 1) return -1; + if (tag2 % 2 == 1) return 1; + + // can't be both negative + DASSERT(!(neg1 && neg2)); + + // positive vs negative: positive wins + if (neg1) return 1; + if (neg2) return -1; + + DASSERT(false); + return 0; +} - // can't be both negative - DASSERT(!(neg1 && neg2)); +void update_offsets(simctx_t &ctx, const conf_t &c) +{ + const size_t nsub = ctx.nsub; + regoff_t *o; + const std::vector &tags = ctx.nfa->tags; + nfa_state_t *s = c.state; + bool *done = ctx.done; - // positive vs negative: positive wins - if (neg1) return 1; - if (neg2) return -1; + if (s->type == nfa_state_t::FIN) { + ctx.marker = ctx.cursor; + ctx.rule = 0; + o = ctx.offsets3; } else { - return unpack_leftmost(ctx.prectbl1[orig1 * ctx.nfa->ncores + orig2]); + o = ctx.offsets1 + s->coreid * nsub; } - DASSERT(false); // unreachable - return 0; + memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t)); + memset(done, 0, nsub * sizeof(bool)); + + for (int32_t i = c.thist; i != history_t::ROOT; ) { + const history_t::node_t &n = ctx.hist.node(i); + const Tag &tag = tags[n.info.idx]; + const size_t t = tag.ncap; + regoff_t *off = o + t; + if (!fictive(tag) && !done[t]) { + done[t] = true; + *off = n.info.neg ? -1 : static_cast(ctx.step); + } + i = n.pred; + } +} + +void update_prectbl(simctx_t &ctx) +{ + const confset_t &state = ctx.state; + const std::vector &tags = ctx.nfa->tags; + std::vector &sortcores = ctx.sortcores; + std::vector &fcount = ctx.fincount; + std::vector stack = ctx.worklist; + std::vector &level = ctx.histlevel; + std::vector::reverse_iterator li, lj, lk, le; + history_t &hist = ctx.hist; + const size_t ncores = ctx.nfa->ncores; + int32_t *oldtbl = ctx.prectbl1, *newtbl = ctx.prectbl2; + + // Group core configurations by their history tree index, so that later + // while traversing the tree we will know at once which configurations + // (if any) are bound to the given tree node. We use counting sort, which + // requires additional memory, but is fast and conveniently creates an + // array of boundaries in the sorted configuration array. + uint32_t maxfin = 0; + for (cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) { + history_t::node_t &n = hist.node(c->thist); + if (n.finidx >= USED) { + n.finidx = maxfin++; + fcount[n.finidx] = 0; + + // mark all nodes down to root as used (unless marked already) + for (int32_t i = n.pred; i >= history_t::ROOT; ) { + history_t::node_t &m = hist.node(i); + if (m.finidx <= USED) break; + m.finidx = USED; + i = m.pred; + } + } + ++fcount[n.finidx]; + } + fcount[maxfin] = 0; + for (size_t i = 1; i <= maxfin; ++i) { + fcount[i] += fcount[i - 1]; + } + sortcores.resize(state.size()); + for (rcconfiter_t c = state.rbegin(), e = state.rend(); c != e; ++c) { + sortcores[--fcount[hist.node(c->thist).finidx]] = &*c; + } + + // Depth-first traversal of the history tree. During traversal we grow + // an array of items (one item per core configuration). Items are added + // in tree nodes that have core configurations associated with them. + // Each item represents one history. Items have immutable part (core ID, + // origin) and mutable part (current minimal height, current tree index) + // that changes as we return down the tree. + level.clear(); + stack.push_back(0); + while (!stack.empty()) { + const int32_t n = stack.back(); + history_t::node_t &node = hist.node(n); + const uint32_t fidx = node.finidx; + + if (fidx == NONFIN) { + // aborted branch of search tree, don't waste time + stack.pop_back(); + continue; + } + + if (node.next != -1) { + // start or continue visiting subtrees rooted at this node + const history_t::arc_t &arc = hist.arc(node.next); + stack.push_back(arc.node); + node.next = arc.next; + continue; + } + + // all subtrees visited, it's time to process this node + const int32_t h = n == 0 ? MAX_RHO : tags[node.info.idx].height; + li = level.rbegin(); + le = level.rend(); + + if (fidx < USED) { + // this node has leaf configurations, add them to level + for (uint32_t k = fcount[fidx], e = fcount[fidx + 1]; k < e; ++k) { + const conf_t *c = sortcores[k]; + const histleaf_t l = {c->state->coreid, c->origin, history_t::ROOT, h}; + level.push_back(l); + } + + // compute precedence for newly added configurations + const int32_t p0 = pack(h, 0); + for (lj = level.rbegin(); lj != li; ++lj) { + for (lk = lj; lk != li; ++lk) { + const uint32_t cj = lj->coreid, ck = lk->coreid; + const uint32_t oj = lj->origin, ok = lk->origin; + const bool fork = n != 0 || oj == ok; + if (fork) { + newtbl[cj * ncores + ck] = p0; + newtbl[ck * ncores + cj] = p0; + } + else { + newtbl[cj * ncores + ck] = oldtbl[oj * ncores + ok]; + newtbl[ck * ncores + cj] = oldtbl[ok * ncores + oj]; + } + } + } + } + + // Each subtree appended a sequence of items to level. We can find + // sequence boundaries by looking at tree index of each item: it is + // equal to tree index of the corresponding subtree (except for the + // leaf items added at this node; but we know where they start). + + // We must compute precedence for each pair of items from different + // sequences (including leaf items added at this node), but not within + // sequence boundaries: those histories fork higher up the subtree; + // their precedence has already been computed and must not be touched. + + for (int32_t a = node.last; a != -1; ) { + const history_t::arc_t &arc = hist.arc(a); + a = arc.prev; + + // for all the items of this subtree + for (lk = li; li != le && li->hidx == arc.node; ++li) { + + // update height of each item coming from subtree + li->height = std::min(li->height, h); + + // for all the level items to the right of this subtree + for (lj = level.rbegin(); lj != lk; ++lj) { + + const uint32_t ci = li->coreid, cj = lj->coreid; + const uint32_t oi = li->origin, oj = lj->origin; + const bool fork = n != 0 || oi == oj; + int32_t p1 = li->height, p2 = lj->height, p; + + if (!fork) { + p1 = std::min(p1, unpack_longest(oldtbl[oi * ncores + oj])); + p2 = std::min(p2, unpack_longest(oldtbl[oj * ncores + oi])); + } + + if (p1 > p2) { + p = -1; + } + else if (p1 < p2) { + p = 1; + } + else if (fork) { + const tag_info_t t1 = hist.node(li->hidx).info; + const tag_info_t t2 = hist.node(lj->hidx).info; + p = leftprec(ctx, t1, t2, t1 == NOINFO, t2 == NOINFO); + } + else { + p = unpack_leftmost(oldtbl[oi * ncores + oj]); + } + + newtbl[ci * ncores + cj] = pack(p1, p); + newtbl[cj * ncores + ci] = pack(p2, -p); + } + } + } + + // finally, downgrade tree index of all subtree items, making their + // origins indistinguishable from each other for the previous level + for (lj = level.rbegin(); lj != li; ++lj) { + lj->hidx = n; + } + + stack.pop_back(); + } + + std::swap(ctx.prectbl1, ctx.prectbl2); +} + +// Old naive algorithm that has cubic complexity in the size of TNFA. +// Example that exhibits cubic behaviour is ((a?){1,N})*. In this example +// closure has O(N) states, and the compared histories have O(N) length. +void update_prectbl_naive(simctx_t &ctx) +{ + const confset_t &state = ctx.state; + const size_t ncores = ctx.nfa->ncores; + int32_t *newtbl = ctx.prectbl2; + + const int32_t p0 = pack(MAX_RHO, 0); + + for (cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) { + nfa_state_t *s = c->state; + DASSERT (s->type == nfa_state_t::RAN); + newtbl[s->coreid * ncores + s->coreid] = p0; + + for (cconfiter_t d = c + 1; d != e; ++d) { + nfa_state_t *q = d->state; + int32_t prec1, prec2; + int32_t prec = precedence(ctx, *c, *d, prec1, prec2); + newtbl[s->coreid * ncores + q->coreid] = pack(prec1, prec); + newtbl[q->coreid * ncores + s->coreid] = pack(prec2, -prec); + } + } + + std::swap(ctx.prectbl1, ctx.prectbl2); } } // namespace libre2c diff --git a/re2c/lib/regexec_nfa_posix_trie.cc b/re2c/lib/regexec_nfa_posix_trie.cc index 3a05bead..6a0167a8 100644 --- a/re2c/lib/regexec_nfa_posix_trie.cc +++ b/re2c/lib/regexec_nfa_posix_trie.cc @@ -260,14 +260,14 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 tag_info_t info1, info2; for (; i1 != i2 && (s1 >= s || s2 >= s);) { if (s1 >= s && (i1 > i2 || s2 < s)) { - const history_t::node_t &n = hist.at(i1); + const history_t::node_t &n = hist.node(i1); info1 = n.info; prec1 = std::min(prec1, tags[info1.idx].height); i1 = n.pred; s1 = get_step(hist, i1); } else { - const history_t::node_t &n = hist.at(i2); + const history_t::node_t &n = hist.node(i2); info2 = n.info; prec2 = std::min(prec2, tags[info2.idx].height); i2 = n.pred; @@ -282,7 +282,7 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 prec2 = std::min(prec2, p2); } else if (i1 != history_t::ROOT) { - const int32_t h = tags[hist.at(i1).info.idx].height; + const int32_t h = tags[hist.node(i1).info.idx].height; prec1 = std::min(prec1, h); prec2 = std::min(prec2, h); } @@ -327,12 +327,12 @@ int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 uint32_t get_step(const history_t &hist, int32_t idx) { - return idx == history_t::ROOT ? 0 : hist.at(idx).step; + return idx == history_t::ROOT ? 0 : hist.node(idx).step; } uint32_t get_orig(const history_t &hist, int32_t idx) { - return idx == history_t::ROOT ? 0 : hist.at(idx).orig; + return idx == history_t::ROOT ? 0 : hist.node(idx).orig; } } // namespace libre2c diff --git a/re2c/lib/test.cc b/re2c/lib/test.cc index 55bbdd86..6c1dd7c8 100644 --- a/re2c/lib/test.cc +++ b/re2c/lib/test.cc @@ -9,6 +9,9 @@ static int test(const char *pattern, const char *string , size_t nmatch, const regoff_t *submatch, int flags) { + static uint32_t total = 0; + static uint32_t failed = 0; + regex_t re; regmatch_t *pmatch = new regmatch_t[nmatch]; const char *prefix = flags & REG_NFA ? "NFA" : "DFA"; @@ -67,6 +70,12 @@ end: regfree(&re); delete[] pmatch; + ++total; + if (result != 0) { + ++failed; + fprintf(stderr, "failed %u of %u\n", failed, total); + } + return result; } @@ -501,6 +510,10 @@ static int test_all_posix(int flags) T4("(ab|a)(c|bcd)(d|.*)", "abcd", 0,4, 0,2, 2,3, 3,4); T4("(ab|a)(bcd|c)(d|.*)", "abcd", 0,4, 0,2, 2,3, 3,4); + if (!(flags & REG_SLOWPREC)) { + T3("((a?){1,1000})*", "aaaa", 0,4, 0,4, 3,4); + } + return e; } @@ -916,6 +929,9 @@ static int test_all_leftmost(int flags) T4("(ab|a)(c|bcd)(d|.*)", "abcd", 0,4, 0,2, 2,3, 3,4); T4("(ab|a)(bcd|c)(d|.*)", "abcd", 0,4, 0,2, 2,3, 3,4); + // other + T3("((a?){1,1000})*", "aaaa", 0,4, 0,4, 4,4); + return e; } @@ -934,10 +950,11 @@ int main() { int e = 0; - e |= test_all_posix(0); +// e |= test_all_posix(0); e |= test_all_posix(REG_NFA); e |= test_all_posix(REG_NFA | REG_GTOP); e |= test_all_posix(REG_NFA | REG_TRIE); + e |= test_all_posix(REG_NFA | REG_SLOWPREC); e |= test_all_leftmost(REG_NFA | REG_LEFTMOST); e |= test_all_leftmost(REG_NFA | REG_LEFTMOST | REG_TRIE); -- 2.50.1