dfa_t *dfa = NULL;
if (cflags & REG_NFA) {
if ((cflags & REG_TRIE) && (cflags & REG_LEFTMOST)) {
- preg->simctx = new libre2c::lzctx_t(*nfa, preg->re_nsub, cflags);
+ preg->simctx = new libre2c::lzsimctx_t(*nfa, preg->re_nsub, cflags);
}
else if (cflags & REG_TRIE) {
- preg->simctx = new libre2c::pzctx_t(*nfa, preg->re_nsub, cflags);
+ preg->simctx = new libre2c::pzsimctx_t(*nfa, preg->re_nsub, cflags);
}
else if (cflags & REG_LEFTMOST) {
- preg->simctx = new libre2c::lctx_t(*nfa, preg->re_nsub, cflags);
+ preg->simctx = new libre2c::lsimctx_t(*nfa, preg->re_nsub, cflags);
}
else {
- preg->simctx = new libre2c::pctx_t(*nfa, preg->re_nsub, cflags);
+ preg->simctx = new libre2c::psimctx_t(*nfa, preg->re_nsub, cflags);
}
}
else {
inline bool operator()(const conf_t &c);
};
-struct cache_entry_t
-{
- int32_t prec1;
- int32_t prec2;
- int32_t prec;
-};
-
-typedef std::map<uint64_t, cache_entry_t> cache_t;
-
typedef std::vector<conf_t> confset_t;
typedef confset_t::iterator confiter_t;
typedef confset_t::const_iterator cconfiter_t;
std::vector<uint32_t> sortcores;
std::vector<uint32_t> fincount;
std::vector<int32_t> worklist;
- cache_t cache;
confset_t reach;
confset_t state;
FORBID_COPY(simctx_t);
};
-typedef simctx_t<POSIX, STRICT> pctx_t;
-typedef simctx_t<LEFTMOST, STRICT> lctx_t;
-typedef simctx_t<POSIX, LAZY> pzctx_t;
-typedef simctx_t<LEFTMOST, LAZY> lzctx_t;
+typedef simctx_t<POSIX, STRICT> psimctx_t;
+typedef simctx_t<LEFTMOST, STRICT> lsimctx_t;
+typedef simctx_t<POSIX, LAZY> pzsimctx_t;
+typedef simctx_t<LEFTMOST, LAZY> lzsimctx_t;
int regexec_dfa(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
int regexec_nfa_posix(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
, sortcores()
, fincount()
, worklist()
- , cache()
, reach()
, state()
, gor1_topsort()
ctx.step = 0;
ctx.rule = Rule::NONE;
ctx.cursor = ctx.marker = string;
- ctx.cache.clear();
ctx.histlevel.clear();
ctx.sortcores.clear();
DASSERT(ctx.worklist.empty());
namespace re2c {
namespace libre2c {
-static void reach_on_symbol(lctx_t &, uint32_t);
-static void closure_leftmost(lctx_t &);
-static void update_offsets(lctx_t &ctx, const conf_t &c);
+static void reach_on_symbol(lsimctx_t &, uint32_t);
+static void closure_leftmost(lsimctx_t &);
+static void update_offsets(lsimctx_t &ctx, const conf_t &c);
int regexec_nfa_leftmost(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int)
{
- lctx_t &ctx = *static_cast<lctx_t*>(preg->simctx);
+ lsimctx_t &ctx = *static_cast<lsimctx_t*>(preg->simctx);
init(ctx, string);
// root state can be non-core, so we pass zero as origin to avoid checks
return 0;
}
-void reach_on_symbol(lctx_t &ctx, uint32_t sym)
+void reach_on_symbol(lsimctx_t &ctx, uint32_t sym)
{
const confset_t &state = ctx.state;
confset_t &reach = ctx.reach;
ctx.history.init();
}
-void closure_leftmost(lctx_t &ctx)
+void closure_leftmost(lsimctx_t &ctx)
{
confset_t &state = ctx.state, &wl = ctx.reach;
state.clear();
wl.push_back(conf_t(n->alt.out1, o, h));
break;
case nfa_state_t::TAG:
- wl.push_back(conf_t(n->tag.out, o
- , ctx.history.push(h, n->tag.info)));
+ wl.push_back(conf_t(n->tag.out, o, ctx.history.link(ctx, x)));
break;
default:
break;
}
}
-void update_offsets(lctx_t &ctx, const conf_t &c)
+void update_offsets(lsimctx_t &ctx, const conf_t &c)
{
const size_t nsub = ctx.nsub;
bool *done = ctx.done;
memset(done, 0, nsub * sizeof(bool));
for (int32_t i = c.thist; i != HROOT; ) {
- const lctx_t::history_t::node_t &n = ctx.history.node(i);
+ const lsimctx_t::history_t::node_t &n = ctx.history.node(i);
const size_t t = n.info.idx;
if (!done[t]) {
done[t] = true;
namespace re2c {
namespace libre2c {
-static void reach_on_symbol(lzctx_t &, uint32_t);
-static void closure_leftmost(lzctx_t &);
+static void reach_on_symbol(lzsimctx_t &, uint32_t);
+static void closure_leftmost(lzsimctx_t &);
int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int)
{
- lzctx_t &ctx = *static_cast<lzctx_t*>(preg->simctx);
+ lzsimctx_t &ctx = *static_cast<lzsimctx_t*>(preg->simctx);
init(ctx, string);
nfa_state_t *s0 = ctx.nfa.root;
return finalize(ctx, string, nmatch, pmatch);
}
-void reach_on_symbol(lzctx_t &ctx, uint32_t sym)
+void reach_on_symbol(lzsimctx_t &ctx, uint32_t sym)
{
const confset_t &state = ctx.state;
confset_t &reach = ctx.reach;
}
}
-void closure_leftmost(lzctx_t &ctx)
+void closure_leftmost(lzsimctx_t &ctx)
{
confset_t &state = ctx.state, &wl = ctx.reach;
state.clear();
wl.push_back(conf_t(n->alt.out1, o, h));
break;
case nfa_state_t::TAG:
- wl.push_back(conf_t(n->tag.out, o
- , ctx.history.push(h, ctx.step, n->tag.info, o)));
+ wl.push_back(conf_t(n->tag.out, o, ctx.history.link(ctx, x)));
break;
case nfa_state_t::RAN:
break;
namespace re2c {
namespace libre2c {
-static void make_one_step(pctx_t &, uint32_t);
-static void make_final_step(pctx_t &);
-static void update_offsets(pctx_t &ctx, const conf_t &c, uint32_t id);
-static void compute_prectbl_naive(pctx_t &ctx);
+static void make_one_step(psimctx_t &, uint32_t);
+static void make_final_step(psimctx_t &);
+static void update_offsets(psimctx_t &ctx, const conf_t &c, uint32_t id);
+static void compute_prectbl_naive(psimctx_t &ctx);
// we *do* want these to be inlined
-static inline void closure_posix(pctx_t &ctx);
+static inline void closure_posix(psimctx_t &ctx);
int regexec_nfa_posix(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int /* eflags */)
{
- pctx_t &ctx = *static_cast<pctx_t*>(preg->simctx);
+ psimctx_t &ctx = *static_cast<psimctx_t*>(preg->simctx);
init(ctx, string);
// root state can be non-core, so we pass zero as origin to avoid checks
return 0;
}
-void closure_posix(pctx_t &ctx)
+void closure_posix(psimctx_t &ctx)
{
ctx.history.detach();
}
}
-void make_one_step(pctx_t &ctx, uint32_t sym)
+void make_one_step(psimctx_t &ctx, uint32_t sym)
{
confset_t &state = ctx.state, &reach = ctx.reach;
uint32_t j = 0;
++ctx.step;
}
-void make_final_step(pctx_t &ctx)
+void make_final_step(psimctx_t &ctx)
{
for (cconfiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) {
nfa_state_t *s = i->state;
}
}
-void update_offsets(pctx_t &ctx, const conf_t &c, uint32_t id)
+void update_offsets(psimctx_t &ctx, const conf_t &c, uint32_t id)
{
const size_t nsub = ctx.nsub;
regoff_t *o;
// Old naive algorithm that has cubic complexity in the size of TNFA.
// Example that exhibits cubic behaviour is ((a?){1,N})*. In this example
// closure has O(N) states, and the compared histories have O(N) length.
-void compute_prectbl_naive(pctx_t &ctx)
+void compute_prectbl_naive(psimctx_t &ctx)
{
const confset_t &state = ctx.state;
int32_t *newtbl = ctx.newprectbl;
newtbl[i * newdim + i] = p0;
for (uint32_t j = i + 1; j < newdim; ++j) {
int32_t prec1, prec2;
- int32_t prec = precedence(ctx, state[i], state[j], prec1, prec2);
+ int32_t prec = psimctx_t::history_t::precedence(ctx, state[i], state[j], prec1, prec2);
newtbl[i * newdim + j] = pack(prec1, prec);
newtbl[j * newdim + i] = pack(prec2, -prec);
}
#include "lib/regex_impl.h"
#include "src/options/opt.h"
#include "src/debug/debug.h"
+#include "src/dfa/closure_posix.h"
#include "src/dfa/determinization.h"
#include "src/dfa/posix_precedence.h"
#include "src/nfa/nfa.h"
namespace re2c {
namespace libre2c {
-/* note [lazy computation and caching of precedence]
- *
- * Eagerly computing precedence values on each step for each pair of closure
- * states is a waste of time: most of these values are not needed, because
- * RE may be unambigous, or the given input string may be unambigous, or even
- * if there is ambiguity, it may take only a few comparisons to resolve. All
- * the rest is wasted effort.
- *
- * We can avoid it by delaying precedence computation until necessary, and
- * then unwinding all the steps backwards, computing precedence for each step
- * and caching the computed values (so that the same pair of histories is not
- * compared twice). It is still the same incremental comparison as with
- * precedence matrices: we compare step by step, going from the fork frame to
- * the join frame. The result of comparison on each step is folded to a triple
- * of numbers and recorded in cache. It is important that we do record each
- * step, not just the final step, because the next pair of ambiguous histories
- * may unwind to the same pair of prefixes that was compared before.
- *
- * For all this to work, it is necessary that we are able to store all history
- * until the end, because at any step we might need to unwind an arbitrary
- * number of steps back. We also need to address individual subhistories
- * efficiently in order to use them as keys in the cache. All this is achieved
- * by storing history in the form of a trie and addressing individual
- * histories by indices in that trie. We also use trie to compute the final
- * tag values (instead of storing tags in registers at each step).
- */
-
-static void make_step(pzctx_t &, uint32_t);
-static void make_final_step(pzctx_t &);
-static void closure_posix(pzctx_t &);
-static int32_t precedence(pzctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy);
-static int32_t precedence_(pzctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy);
-
-// we *do* want this to be inlined
-static inline void relax(pzctx_t &, const conf_t &);
-static inline uint32_t get_step(const zhistory_t &hist, int32_t idx);
-static inline uint32_t get_orig(const zhistory_t &hist, int32_t idx);
+static void make_step(pzsimctx_t &, uint32_t);
+static void make_final_step(pzsimctx_t &);
int regexec_nfa_posix_trie(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int)
{
- pzctx_t &ctx = *static_cast<pzctx_t*>(preg->simctx);
+ pzsimctx_t &ctx = *static_cast<pzsimctx_t*>(preg->simctx);
init(ctx, string);
nfa_state_t *s0 = ctx.nfa.root;
const conf_t c0(s0, s0->coreid, HROOT);
ctx.reach.push_back(c0);
- closure_posix(ctx);
+ closure_posix_gtop(ctx);
for (;;) {
const uint32_t sym = static_cast<uint8_t>(*ctx.cursor++);
if (ctx.state.empty() || sym == 0) break;
make_step(ctx, sym);
- closure_posix(ctx);
+ closure_posix_gtop(ctx);
}
make_final_step(ctx);
return finalize(ctx, string, nmatch, pmatch);
}
-void make_step(pzctx_t &ctx, uint32_t sym)
+void make_step(pzsimctx_t &ctx, uint32_t sym)
{
const confset_t &state = ctx.state;
confset_t &reach = ctx.reach;
++ctx.step;
}
-void make_final_step(pzctx_t &ctx)
+void make_final_step(pzsimctx_t &ctx)
{
for (confiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) {
nfa_state_t *s = i->state;
}
}
-void closure_posix(pzctx_t &ctx)
-{
- const confset_t &reach = ctx.reach;
- confset_t &state = ctx.state;
- gtop_heap_t &heap = ctx.gtop_heap;
-
- state.clear();
-
- for (cconfiter_t c = reach.begin(); c != reach.end(); ++c) {
- relax(ctx, *c);
- }
-
- for (; !heap.empty(); ) {
- nfa_state_t *q = heap.top();
- heap.pop();
- q->active = 0;
-
- const conf_t &x = state[q->clos];
- const uint32_t o = x.origin;
- const int32_t h = x.thist;
-
- switch (q->type) {
- case nfa_state_t::NIL:
- relax(ctx, conf_t(q->nil.out, o, h));
- break;
- case nfa_state_t::ALT:
- relax(ctx, conf_t(q->alt.out1, o, h));
- relax(ctx, conf_t(q->alt.out2, o, h));
- break;
- case nfa_state_t::TAG:
- relax(ctx, conf_t(q->tag.out, o
- , ctx.history.push(h, ctx.step, q->tag.info, o)));
- break;
- default:
- break;
- }
- }
-}
-
-void relax(pzctx_t &ctx, const conf_t &c)
-{
- confset_t &state = ctx.state;
- nfa_state_t *q = c.state;
- const uint32_t idx = q->clos;
- int32_t h1, h2;
-
- // first time we see this state
- if (idx == NOCLOS) {
- q->clos = static_cast<uint32_t>(state.size());
- state.push_back(c);
- }
-
- // States of in-degree less than 2 are not joint points;
- // the fact that we are re-scanning this state means that we found
- // a better path to some previous state. Due to the right distributivity
- // of path comparison over path concatenation (X < Y => XZ < YZ) we
- // can just propagate the new path up to the next join point.
- else if (q->indeg < 2) {
- state[idx] = c;
- }
-
- // join point; compare the new path and the old path
- else if (precedence(ctx, c.thist, state[idx].thist, h1, h2) < 0) {
- state[idx] = c;
- }
-
- // the previous path was better, discard the new one
- else {
- q = NULL;
- }
-
- if (q != NULL && !q->active) {
- q->active = 1;
- ctx.gtop_heap.push(q);
- }
-}
-
-int32_t precedence(pzctx_t &ctx, int32_t idx1, int32_t idx2
- , int32_t &prec1, int32_t &prec2)
-{
- int32_t prec = 0;
-
- // use the same cache entry for (x, y) and (y, x)
- uint32_t k1 = static_cast<uint32_t>(idx1), k2 = static_cast<uint32_t>(idx2);
- bool invert = k2 < k1;
- if (invert) std::swap(k1, k2);
- const uint64_t key = (static_cast<uint64_t>(k1) << 32) | k2;
-
- cache_t::const_iterator i = ctx.cache.find(key);
- if (i != ctx.cache.end()) {
- // use previously computed precedence values from cache
- const cache_entry_t &val = i->second;
- prec1 = val.prec1;
- prec2 = val.prec2;
- prec = val.prec;
- if (invert) {
- std::swap(prec1, prec2);
- prec = -prec;
- }
- }
- else {
- // compute precedence values and put them into cache
- prec = precedence_(ctx, idx1, idx2, prec1, prec2);
- cache_entry_t val = {prec1, prec2, prec};
- if (invert) {
- std::swap(val.prec1, val.prec2);
- val.prec = -val.prec;
- }
- ctx.cache.insert(std::make_pair(key, val));
- }
-
- return prec;
-}
-
-int32_t precedence_(pzctx_t &ctx, int32_t idx1, int32_t idx2
- , int32_t &prec1, int32_t &prec2)
-{
- if (idx1 == idx2) {
- prec1 = prec2 = MAX_RHO;
- return 0;
- }
-
- const std::vector<Tag> &tags = ctx.nfa.tags;
- zhistory_t &hist = ctx.history;
-
- int32_t prec = 0;
- prec1 = prec2 = MAX_RHO;
-
- int32_t i1 = idx1, i2 = idx2;
-
- const uint32_t o1 = get_orig(hist, idx1), o2 = get_orig(hist, idx2);
-
- uint32_t s1 = get_step(hist, i1), s2 = get_step(hist, i2);
- const uint32_t s = std::max(s1, s2);
-
- const bool fork_frame = o1 == o2 && s1 == s2;
-
- tag_info_t info1, info2;
- for (; i1 != i2 && (s1 >= s || s2 >= s);) {
- if (s1 >= s && (i1 > i2 || s2 < s)) {
- const zhistory_t::node_t &n = hist.node(i1);
- info1 = n.info;
- prec1 = std::min(prec1, tags[info1.idx].height);
- i1 = n.pred;
- s1 = get_step(hist, i1);
- }
- else {
- const zhistory_t::node_t &n = hist.node(i2);
- info2 = n.info;
- prec2 = std::min(prec2, tags[info2.idx].height);
- i2 = n.pred;
- s2 = get_step(hist, i2);
- }
- }
- if (!fork_frame) {
- // recurse into previous steps (via cache)
- int32_t p1, p2;
- prec = precedence(ctx, i1, i2, p1, p2);
- prec1 = std::min(prec1, p1);
- prec2 = std::min(prec2, p2);
- }
- else if (i1 != HROOT) {
- const int32_t h = tags[hist.node(i1).info.idx].height;
- prec1 = std::min(prec1, h);
- prec2 = std::min(prec2, h);
- }
-
- // longest precedence
- if (prec1 > prec2) return -1;
- if (prec1 < prec2) return 1;
-
- // leftmost precedence
- if (fork_frame) {
- // equal => not less
- if (i1 == idx1 && i2 == idx2) return 0;
-
- // shorter => less
- if (i1 == idx1) return -1;
- if (i2 == idx2) return 1;
-
- const uint32_t tag1 = info1.idx, tag2 = info2.idx;
- const bool neg1 = info1.neg, neg2 = info2.neg;
-
- // can't be both closing
- DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
-
- // closing vs opening: closing wins
- if (tag1 % 2 == 1) return -1;
- if (tag2 % 2 == 1) return 1;
-
- // can't be both negative
- DASSERT(!(neg1 && neg2));
-
- // positive vs negative: positive wins
- if (neg1) return 1;
- if (neg2) return -1;
- }
- else {
- return prec;
- }
-
- DASSERT(false); // unreachable
- return 0;
-}
-
-uint32_t get_step(const zhistory_t &hist, int32_t idx)
-{
- return idx == HROOT ? 0 : hist.node(idx).step;
-}
-
-uint32_t get_orig(const zhistory_t &hist, int32_t idx)
-{
- return idx == HROOT ? 0 : hist.node(idx).orig;
-}
-
} // namespace libre2c
} // namespace re2c
if (preg->flags & REG_NFA) {
if ((preg->flags & REG_TRIE) && (preg->flags & REG_LEFTMOST)) {
- delete static_cast<libre2c::lzctx_t*>(preg->simctx);
+ delete static_cast<libre2c::lzsimctx_t*>(preg->simctx);
}
else if (preg->flags & REG_TRIE) {
- delete static_cast<libre2c::pzctx_t*>(preg->simctx);
+ delete static_cast<libre2c::pzsimctx_t*>(preg->simctx);
}
else if (preg->flags & REG_LEFTMOST) {
- delete static_cast<libre2c::lctx_t*>(preg->simctx);
+ delete static_cast<libre2c::lsimctx_t*>(preg->simctx);
}
else {
- delete static_cast<libre2c::pctx_t*>(preg->simctx);
+ delete static_cast<libre2c::psimctx_t*>(preg->simctx);
}
}
else {
// DFS; linear complexity
for (; !todo.empty(); ) {
- clos_t x = todo.top();
- todo.pop();
+ const clos_t &x = todo.top();
nfa_state_t *n = x.state;
+ todo.pop();
- if (n->clos == NOCLOS) {
- n->clos = static_cast<uint32_t>(done.size());
- done.push_back(x);
-
- switch (n->type) {
- case nfa_state_t::NIL:
- x.state = n->nil.out;
- todo.push(x);
- break;
- case nfa_state_t::ALT:
- x.state = n->alt.out2;
- todo.push(x);
- x.state = n->alt.out1;
- todo.push(x);
- break;
- case nfa_state_t::TAG:
- x.state = n->tag.out;
- x.thist = ctx.history.push(x.thist, n->tag.info);
- todo.push(x);
- break;
- case nfa_state_t::RAN:
- case nfa_state_t::FIN:
- break;
- }
+ if (n->clos != NOCLOS) continue;
+
+ n->clos = static_cast<uint32_t>(done.size());
+ done.push_back(x);
+
+ switch (n->type) {
+ case nfa_state_t::NIL:
+ todo.push(clos_t(x, n->nil.out));
+ break;
+ case nfa_state_t::ALT:
+ todo.push(clos_t(x, n->alt.out2));
+ todo.push(clos_t(x, n->alt.out1));
+ break;
+ case nfa_state_t::TAG:
+ todo.push(clos_t(x, n->tag.out, ctx.history.link(ctx, x)));
+ break;
+ default:
+ break;
}
}
break;
case nfa_state_t::TAG:
if (q->arcidx == 0) {
- any |= relax_gor1(ctx, conf_t(x, q->tag.out
- , ctx.history.push(x.thist, q->tag.info)));
+ any |= relax_gor1(ctx, conf_t(x, q->tag.out, ctx.history.link(ctx, x)));
++q->arcidx;
}
break;
state.push_back(x);
}
else if (q->indeg < 2
- || precedence(ctx, x, state[idx], p1, p2) < 0) {
+ || ctx_t::history_t::precedence(ctx, x, state[idx], p1, p2) < 0) {
state[idx] = x;
}
else {
relax_gtop(ctx, conf_t(x, q->alt.out2));
break;
case nfa_state_t::TAG:
- relax_gtop(ctx, conf_t(x, q->tag.out
- , ctx.history.push(x.thist, q->tag.info)));
+ relax_gtop(ctx, conf_t(x, q->tag.out, ctx.history.link(ctx, x)));
break;
default:
break;
state.push_back(c);
}
else if (q->indeg < 2
- || precedence(ctx, c, state[idx], p1, p2) < 0) {
+ || ctx_t::history_t::precedence(ctx, c, state[idx], p1, p2) < 0) {
state[idx] = c;
}
else {
namespace re2c {
+/* note [lazy computation and caching of precedence]
+ *
+ * Eagerly computing precedence values on each step for each pair of closure
+ * states is a waste of time: most of these values are not needed, because
+ * RE may be unambigous, or the given input string may be unambigous, or even
+ * if there is ambiguity, it may take only a few comparisons to resolve. All
+ * the rest is wasted effort.
+ *
+ * We can avoid it by delaying precedence computation until necessary, and
+ * then unwinding all the steps backwards, computing precedence for each step
+ * and caching the computed values (so that the same pair of histories is not
+ * compared twice). It is still the same incremental comparison as with
+ * precedence matrices: we compare step by step, going from the fork frame to
+ * the join frame. The result of comparison on each step is folded to a triple
+ * of numbers and recorded in cache. It is important that we do record each
+ * step, not just the final step, because the next pair of ambiguous histories
+ * may unwind to the same pair of prefixes that was compared before.
+ *
+ * For all this to work, it is necessary that we are able to store all history
+ * until the end, because at any step we might need to unwind an arbitrary
+ * number of steps back. We also need to address individual subhistories
+ * efficiently in order to use them as keys in the cache. All this is achieved
+ * by storing history in the form of a trie and addressing individual
+ * histories by indices in that trie. We also use trie to compute the final
+ * tag values (instead of storing tags in registers at each step).
+ */
+
// maximum 29-bit (we have 30 bits, but highest must be non-negative)
static const int32_t MAX_RHO = 0x1fffFFFF;
-inline int32_t unpack_longest(int32_t packed)
-{
- // take lower 30 bits and sign-extend
- return static_cast<int32_t>(static_cast<uint32_t>(packed) << 2u) >> 2u;
-}
-
-inline int32_t unpack_leftmost(int32_t packed)
-{
- // take higher 2 bits and sign-extend
- return packed >> 30u;
-}
-
-inline int32_t pack(int32_t longest, int32_t leftmost)
-{
- // avoid signed overflows by using unsigned arithmetics
- uint32_t u_longest = static_cast<uint32_t>(longest);
- uint32_t u_leftmost = static_cast<uint32_t>(leftmost);
-
- // leftmost: higher 2 bits, longest: lower 30 bits
- uint32_t u_packed = (u_longest & 0x3fffFFFF) | (u_leftmost << 30u);
- int32_t packed = static_cast<int32_t>(u_packed);
-
- DASSERT(unpack_longest(packed) == longest
- && unpack_leftmost(packed) == leftmost);
+template<typename ctx_t> static int32_t zprecedence1(ctx_t &ctx
+ , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2);
+template<typename ctx_t> static int32_t zprecedence2(ctx_t &ctx
+ , hidx_t idx1, hidx_t idx2, int32_t &prec1, int32_t &prec2);
- return packed;
-}
+// we *do* want this to be inlined
+static inline int32_t leftprec(tag_info_t info1, tag_info_t info2, bool last1, bool last2);
+static inline int32_t unpack_longest(int32_t packed);
+static inline int32_t unpack_leftmost(int32_t packed);
+static inline int32_t pack(int32_t longest, int32_t leftmost);
+static inline uint32_t get_step(const zhistory_t &hist, int32_t idx);
+static inline uint32_t get_orig(const zhistory_t &hist, int32_t idx);
-inline int32_t leftprec(tag_info_t info1, tag_info_t info2, bool last1, bool last2)
-{
- // equal => not less
- if (last1 && last2) return 0;
-
- // shorter => less
- if (last1) return -1;
- if (last2) return 1;
-
- const uint32_t tag1 = info1.idx, tag2 = info2.idx;
- const bool neg1 = info1.neg, neg2 = info2.neg;
-
- // can't be both closing
- DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
-
- // closing vs opening: closing wins
- if (tag1 % 2 == 1) return -1;
- if (tag2 % 2 == 1) return 1;
-
- // can't be both negative
- DASSERT(!(neg1 && neg2));
-
- // positive vs negative: positive wins
- if (neg1) return 1;
- if (neg2) return -1;
-
- // positive vs positive: smaller wins
- // (this case is only possible because multiple
- // top-level RE don't have proper negative tags)
- if (tag1 < tag2) return -1;
- if (tag1 > tag2) return 1;
-
- DASSERT(false);
- return 0;
-}
-
-template<typename ctx_t, typename conf_t>
-int32_t precedence(ctx_t &ctx, const conf_t &x, const conf_t &y
+template<typename ctx_t>
+int32_t phistory_t::precedence(ctx_t &ctx
+ , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
, int32_t &prec1, int32_t &prec2)
{
prec1 = prec2 = MAX_RHO;
: leftprec(info1, info2, i1 == idx1, i2 == idx2);
}
+int32_t leftprec(tag_info_t info1, tag_info_t info2, bool last1, bool last2)
+{
+ // equal => not less
+ if (last1 && last2) return 0;
+
+ // shorter => less
+ if (last1) return -1;
+ if (last2) return 1;
+
+ const uint32_t tag1 = info1.idx, tag2 = info2.idx;
+ const bool neg1 = info1.neg, neg2 = info2.neg;
+
+ // can't be both closing
+ DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
+
+ // closing vs opening: closing wins
+ if (tag1 % 2 == 1) return -1;
+ if (tag2 % 2 == 1) return 1;
+
+ // can't be both negative
+ DASSERT(!(neg1 && neg2));
+
+ // positive vs negative: positive wins
+ if (neg1) return 1;
+ if (neg2) return -1;
+
+ // positive vs positive: smaller wins
+ // (this case is only possible because multiple
+ // top-level RE don't have proper negative tags)
+ if (tag1 < tag2) return -1;
+ if (tag1 > tag2) return 1;
+
+ DASSERT(false);
+ return 0;
+}
+
template<typename ctx_t>
void compute_prectable(ctx_t &ctx)
{
}
}
+template<typename ctx_t>
+int32_t zhistory_t::precedence(ctx_t &ctx
+ , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
+ , int32_t &prec1, int32_t &prec2)
+{
+ return zprecedence1(ctx, x.thist, y.thist, prec1, prec2);
+}
+
+// see note [lazy computation and caching of precedence]
+template<typename ctx_t>
+int32_t zprecedence1(ctx_t &ctx, hidx_t idx1, hidx_t idx2
+ , int32_t &prec1, int32_t &prec2)
+{
+ zhistory_t::cache_t &cache = ctx.history.cache;
+ int32_t prec = 0;
+
+ // use the same cache entry for (x, y) and (y, x)
+ uint32_t k1 = static_cast<uint32_t>(idx1), k2 = static_cast<uint32_t>(idx2);
+ bool invert = k2 < k1;
+ if (invert) std::swap(k1, k2);
+ const uint64_t key = (static_cast<uint64_t>(k1) << 32) | k2;
+
+ zhistory_t::cache_t::const_iterator i = cache.find(key);
+ if (i != cache.end()) {
+ // use previously computed precedence values from cache
+ const zhistory_t::cache_entry_t &val = i->second;
+ prec1 = val.prec1;
+ prec2 = val.prec2;
+ prec = val.prec;
+ if (invert) {
+ std::swap(prec1, prec2);
+ prec = -prec;
+ }
+ }
+ else {
+ // compute precedence values and put them into cache
+ prec = zprecedence2(ctx, idx1, idx2, prec1, prec2);
+ zhistory_t::cache_entry_t val = {prec1, prec2, prec};
+ if (invert) {
+ std::swap(val.prec1, val.prec2);
+ val.prec = -val.prec;
+ }
+ cache.insert(std::make_pair(key, val));
+ }
+
+ return prec;
+}
+
+// see note [lazy computation and caching of precedence]
+template<typename ctx_t>
+int32_t zprecedence2(ctx_t &ctx, hidx_t idx1, hidx_t idx2
+ , int32_t &prec1, int32_t &prec2)
+{
+ if (idx1 == idx2) {
+ prec1 = prec2 = MAX_RHO;
+ return 0;
+ }
+
+ const std::vector<Tag> &tags = ctx.nfa.tags;
+ zhistory_t &hist = ctx.history;
+
+ int32_t prec = 0;
+ prec1 = prec2 = MAX_RHO;
+
+ int32_t i1 = idx1, i2 = idx2;
+
+ const uint32_t o1 = get_orig(hist, idx1), o2 = get_orig(hist, idx2);
+
+ uint32_t s1 = get_step(hist, i1), s2 = get_step(hist, i2);
+ const uint32_t s = std::max(s1, s2);
+
+ const bool fork_frame = o1 == o2 && s1 == s2;
+
+ tag_info_t info1, info2;
+ for (; i1 != i2 && (s1 >= s || s2 >= s);) {
+ if (s1 >= s && (i1 > i2 || s2 < s)) {
+ const zhistory_t::node_t &n = hist.node(i1);
+ info1 = n.info;
+ prec1 = std::min(prec1, tags[info1.idx].height);
+ i1 = n.pred;
+ s1 = get_step(hist, i1);
+ }
+ else {
+ const zhistory_t::node_t &n = hist.node(i2);
+ info2 = n.info;
+ prec2 = std::min(prec2, tags[info2.idx].height);
+ i2 = n.pred;
+ s2 = get_step(hist, i2);
+ }
+ }
+ if (!fork_frame) {
+ // recurse into previous steps (via cache)
+ int32_t p1, p2;
+ prec = zprecedence1(ctx, i1, i2, p1, p2);
+ prec1 = std::min(prec1, p1);
+ prec2 = std::min(prec2, p2);
+ }
+ else if (i1 != HROOT) {
+ const int32_t h = tags[hist.node(i1).info.idx].height;
+ prec1 = std::min(prec1, h);
+ prec2 = std::min(prec2, h);
+ }
+
+ // longest precedence
+ if (prec1 > prec2) return -1;
+ if (prec1 < prec2) return 1;
+
+ // leftmost precedence
+ return !fork_frame ? prec
+ : leftprec(info1, info2, i1 == idx1, i2 == idx2);
+}
+
+int32_t unpack_longest(int32_t packed)
+{
+ // take lower 30 bits and sign-extend
+ return static_cast<int32_t>(static_cast<uint32_t>(packed) << 2u) >> 2u;
+}
+
+int32_t unpack_leftmost(int32_t packed)
+{
+ // take higher 2 bits and sign-extend
+ return packed >> 30u;
+}
+
+int32_t pack(int32_t longest, int32_t leftmost)
+{
+ // avoid signed overflows by using unsigned arithmetics
+ uint32_t u_longest = static_cast<uint32_t>(longest);
+ uint32_t u_leftmost = static_cast<uint32_t>(leftmost);
+
+ // leftmost: higher 2 bits, longest: lower 30 bits
+ uint32_t u_packed = (u_longest & 0x3fffFFFF) | (u_leftmost << 30u);
+ int32_t packed = static_cast<int32_t>(u_packed);
+
+ DASSERT(unpack_longest(packed) == longest
+ && unpack_leftmost(packed) == leftmost);
+
+ return packed;
+}
+
+uint32_t get_step(const zhistory_t &hist, int32_t idx)
+{
+ return idx == HROOT ? 0 : hist.node(idx).step;
+}
+
+uint32_t get_orig(const zhistory_t &hist, int32_t idx)
+{
+ return idx == HROOT ? 0 : hist.node(idx).orig;
+}
+
} // namespace re2c
#endif // _RE2C_DFA_POSIX_PRECEDENCE_
#include <stddef.h>
#include "src/util/c99_stdint.h"
#include <sys/types.h>
+#include <map>
#include <vector>
#include "src/regexp/tag.h"
inline const node_t &node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
inline arc_t &arc(hidx_t i) { return arcs[static_cast<uint32_t>(i)]; }
inline const arc_t &arc(hidx_t i) const { return arcs[static_cast<uint32_t>(i)]; }
- inline hidx_t push(hidx_t idx, tag_info_t info);
+ template<typename ctx_t> inline hidx_t link(ctx_t &ctx
+ , const typename ctx_t::conf_t &conf);
+ template<typename ctx_t> static int32_t precedence(ctx_t &ctx
+ , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
+ , int32_t &prec1, int32_t &prec2);
FORBID_COPY(phistory_t);
};
inline void init();
inline node_t &node(hidx_t i) { return nodes[static_cast<uint32_t>(i)]; }
inline const node_t &node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
- inline hidx_t push(hidx_t idx, tag_info_t info);
+ template<typename ctx_t> inline hidx_t link(ctx_t &ctx
+ , const typename ctx_t::conf_t &conf);
FORBID_COPY(lhistory_t);
};
struct node_t {
tag_info_t info;
hidx_t pred;
- uint32_t step;
uint32_t orig;
+ uint32_t step;
+
+ inline node_t(tag_info_t info, hidx_t pred, uint32_t orig, uint32_t step)
+ : info(info), pred(pred), orig(orig), step(step) {}
+ };
- inline node_t(tag_info_t info, hidx_t pred, uint32_t step, uint32_t orig)
- : info(info), pred(pred), step(step), orig(orig) {}
+ struct cache_entry_t
+ {
+ int32_t prec1;
+ int32_t prec2;
+ int32_t prec;
};
+ typedef std::map<uint64_t, cache_entry_t> cache_t;
std::vector<node_t> nodes;
+ cache_t cache;
- inline zhistory_t(): nodes() { init(); }
+ inline zhistory_t(): nodes(), cache() { init(); }
inline void init();
inline node_t &node(hidx_t i) { return nodes[static_cast<uint32_t>(i)]; }
inline const node_t &node(hidx_t i) const { return nodes[static_cast<uint32_t>(i)]; }
- inline hidx_t push(hidx_t idx, uint32_t step, tag_info_t info, uint32_t orig);
+ template<typename ctx_t> inline hidx_t link(ctx_t &ctx
+ , const typename ctx_t::conf_t &conf);
+ template<typename ctx_t> static int32_t precedence(ctx_t &ctx
+ , const typename ctx_t::conf_t &x, const typename ctx_t::conf_t &y
+ , int32_t &prec1, int32_t &prec2);
FORBID_COPY(zhistory_t);
};
{
nodes.clear();
nodes.push_back(node_t(NOINFO, -1, 0, 0));
+ cache.clear();
}
void phistory_t::detach()
n.finidx = NONFIN;
}
-int32_t phistory_t::push(int32_t idx, tag_info_t info)
+template<typename ctx_t>
+hidx_t phistory_t::link(ctx_t &/* ctx */, const typename ctx_t::conf_t &conf)
{
+ const hidx_t idx = conf.thist;
const int32_t i = static_cast<int32_t>(nodes.size());
if (idx != -1) {
node_t &n = node(idx);
}
n.last = a;
}
- nodes.push_back(node_t(info, idx, -1, -1));
+ nodes.push_back(node_t(conf.state->tag.info, idx, -1, -1));
return i;
}
-int32_t lhistory_t::push(int32_t idx, tag_info_t info)
+template<typename ctx_t>
+hidx_t lhistory_t::link(ctx_t &/* ctx */, const typename ctx_t::conf_t &conf)
{
const int32_t i = static_cast<int32_t>(nodes.size());
- nodes.push_back(node_t(info, idx));
+ nodes.push_back(node_t(conf.state->tag.info, conf.thist));
return i;
}
-int32_t zhistory_t::push(int32_t idx, uint32_t step, tag_info_t info, uint32_t orig)
+template<typename ctx_t>
+hidx_t zhistory_t::link(ctx_t &ctx, const typename ctx_t::conf_t &conf)
{
const int32_t i = static_cast<int32_t>(nodes.size());
- nodes.push_back(node_t(info, idx, step, orig));
+ nodes.push_back(node_t(conf.state->tag.info, conf.thist, conf.origin, ctx.step));
return i;
}