src/cfg/cfg.h \
src/dfa/determinization.h \
src/dfa/dfa.h \
+ src/dfa/posix_precedence.h \
src/dfa/tag_history.h \
src/dfa/tagver_table.h \
src/dfa/tcmd.h \
src/dfa/fillpoints.cc \
src/dfa/find_state.cc \
src/dfa/minimization.cc \
- src/dfa/posix_precedence.cc \
src/dfa/tagver_table.cc \
src/dfa/tcmd.cc \
src/encoding/ebcdic/ebcdic_regexp.cc \
src/cfg/cfg.h \
src/dfa/determinization.h \
src/dfa/dfa.h \
+ src/dfa/posix_precedence.h \
src/dfa/tag_history.h \
src/dfa/tagver_table.h \
src/dfa/tcmd.h \
src/dfa/fillpoints.cc \
src/dfa/find_state.cc \
src/dfa/minimization.cc \
- src/dfa/posix_precedence.cc \
src/dfa/tagver_table.cc \
src/dfa/tcmd.cc \
src/nfa/estimate_size.cc \
dfa_t *dfa = NULL;
if (cflags & REG_NFA) {
- preg->simctx = new libre2c::simctx_t(nfa, preg->re_nsub, cflags);
+ preg->simctx = new libre2c::simctx_t(*nfa, preg->re_nsub, cflags);
}
else {
preg->char2class = new size_t[256];
: state(s), origin(o), thist(h) {}
};
-struct histleaf_t
-{
- uint32_t coreid;
- uint32_t origin;
- int32_t hidx;
- int32_t height;
-};
-
struct ran_or_fin_t
{
inline bool operator()(const conf_t &c);
struct simctx_t
{
- const nfa_t *nfa;
+ typedef std::vector<conf_t> confset_t;
+ typedef confset_t::iterator confiter_t;
+ typedef confset_t::const_iterator cconfiter_t;
+ typedef confset_t::reverse_iterator rconfiter_t;
+ typedef confset_t::const_reverse_iterator rcconfiter_t;
+
+ const nfa_t &nfa;
const size_t nsub;
const int flags;
confset_t reach;
confset_t state;
- tag_history_t hist;
+ tag_history_t history;
int32_t hidx;
uint32_t step;
regoff_t *offsets1;
regoff_t *offsets2;
regoff_t *offsets3;
-
bool *done;
- int32_t *prectbl1;
- int32_t *prectbl2;
- cache_t cache;
+ int32_t *newprectbl;
+ int32_t *oldprectbl;
+ size_t oldprecdim;
std::vector<histleaf_t> histlevel;
- std::vector<const conf_t*> sortcores;
+ std::vector<uint32_t> sortcores;
std::vector<uint32_t> fincount;
std::vector<int32_t> worklist;
+ cache_t cache;
std::vector<nfa_state_t*> gor1_topsort;
std::vector<nfa_state_t*> gor1_linear;
std::vector<nfa_state_t*> gtop_heap_storage;
cmp_gtop_t gtop_cmp;
gtop_heap_t gtop_heap;
+ closure_stats_t dc_clstats;
- simctx_t(const nfa_t *nfa, size_t re_nsub, int flags);
+ simctx_t(const nfa_t &nfa, size_t re_nsub, int flags);
~simctx_t();
FORBID_COPY(simctx_t);
};
m->rm_so = 0;
m->rm_eo = ctx.marker - string - 1;
- const std::vector<Tag> &tags = ctx.nfa->tags;
+ const std::vector<Tag> &tags = ctx.nfa.tags;
size_t todo = nmatch * 2;
bool *done = ctx.done;
memset(done, 0, ctx.nsub * sizeof(bool));
for (int32_t i = ctx.hidx; todo > 0 && i != HROOT; ) {
- const tag_history_t::node_t &n = ctx.hist.node(i);
+ const tag_history_t::node_t &n = ctx.history.node(i);
const Tag &tag = tags[n.info.idx];
const size_t t = tag.ncap;
if (!fictive(tag) && t < nmatch * 2 && !done[t]) {
done[t] = true;
--todo;
- const regoff_t off = n.info.neg ? -1 : static_cast<regoff_t>(ctx.hist.node2(i).step);
+ const regoff_t off = n.info.neg ? -1 : static_cast<regoff_t>(ctx.history.node2(i).step);
m = &pmatch[t / 2 + 1];
if (t % 2 == 0) {
m->rm_so = off;
return 0;
}
-simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags)
+simctx_t::simctx_t(const nfa_t &nfa, size_t re_nsub, int flags)
: nfa(nfa)
, nsub(2 * (re_nsub - 1))
, flags(flags)
, reach()
, state()
- , hist()
+ , history()
, hidx(HROOT)
, step(0)
, rule(Rule::NONE)
, offsets2(NULL)
, offsets3(NULL)
, done(NULL)
- , prectbl1(NULL)
- , prectbl2(NULL)
- , cache()
+ , newprectbl(NULL)
+ , oldprectbl(NULL)
+ , oldprecdim(0)
, histlevel()
, sortcores()
, fincount()
, worklist()
+ , cache()
, gor1_topsort()
, gor1_linear()
, gtop_heap_storage()
, gtop_cmp()
, gtop_heap(gtop_cmp, gtop_heap_storage)
+ , dc_clstats()
{
const size_t
- nstates = nfa->size,
- ncores = nfa->ncores;
+ nstates = nfa.size,
+ ncores = nfa.ncores;
state.reserve(nstates);
reach.reserve(nstates);
offsets3 = new regoff_t[nsub];
}
if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
- prectbl1 = new int32_t[ncores * ncores];
- prectbl2 = new int32_t[ncores * ncores];
+ newprectbl = new int32_t[ncores * ncores];
+ oldprectbl = new int32_t[ncores * ncores];
histlevel.reserve(ncores);
sortcores.reserve(ncores);
fincount.resize(ncores + 1);
delete[] offsets3;
}
if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
- delete[] prectbl1;
- delete[] prectbl2;
+ delete[] newprectbl;
+ delete[] oldprectbl;
}
}
{
ctx.reach.clear();
ctx.state.clear();
- ctx.hist.init();
+ ctx.history.init();
ctx.hidx = HROOT;
ctx.step = 0;
ctx.rule = Rule::NONE;
init(ctx, string);
// root state can be non-core, so we pass zero as origin to avoid checks
- const conf_t c0(ctx.nfa->root, 0, HROOT);
+ const conf_t c0(ctx.nfa.root, 0, HROOT);
ctx.reach.push_back(c0);
closure_leftmost(ctx);
}
std::swap(ctx.offsets1, ctx.offsets2);
- ctx.hist.init();
+ ctx.history.init();
}
void closure_leftmost(simctx_t &ctx)
break;
case nfa_state_t::TAG:
wl.push_back(conf_t(n->tag.out, o
- , ctx.hist.push(h, n->tag.info)));
+ , ctx.history.push(h, n->tag.info)));
break;
default:
break;
memset(done, 0, nsub * sizeof(bool));
for (int32_t i = c.thist; i != HROOT; ) {
- const tag_history_t::node_t &n = ctx.hist.node(i);
+ const tag_history_t::node_t &n = ctx.history.node(i);
const size_t t = n.info.idx;
if (!done[t]) {
done[t] = true;
simctx_t &ctx = *preg->simctx;
init(ctx, string);
- nfa_state_t *s0 = ctx.nfa->root;
+ nfa_state_t *s0 = ctx.nfa.root;
const conf_t c0(s0, s0->coreid, HROOT);
ctx.reach.push_back(c0);
closure_leftmost(ctx);
break;
case nfa_state_t::TAG:
wl.push_back(conf_t(n->tag.out, o
- , ctx.hist.push2(h, ctx.step, n->tag.info, o)));
+ , ctx.history.push2(h, ctx.step, n->tag.info, o)));
break;
case nfa_state_t::RAN:
break;
#include "src/options/opt.h"
#include "src/debug/debug.h"
#include "src/dfa/determinization.h"
+#include "src/dfa/posix_precedence.h"
#include "src/nfa/nfa.h"
template<sssp_alg_t ALG> static void closure_posix(simctx_t &ctx);
static void make_one_step(simctx_t &, uint32_t);
static void make_final_step(simctx_t &);
-static void update_offsets(simctx_t &ctx, const conf_t &c);
-static void update_prectbl(simctx_t &);
-static void update_prectbl_naive(simctx_t &ctx);
-static int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y, int32_t &prec1, int32_t &prec2);
+static void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id);
+static void compute_prectbl_naive(simctx_t &ctx);
// we *do* want these to be inlined
static inline bool scan(simctx_t &ctx, nfa_state_t *q, bool all);
static inline bool relax_gor1(simctx_t &, const conf_t &);
static inline void relax_gtop(simctx_t &, const conf_t &);
-static inline int32_t leftprec(simctx_t &, tag_info_t info1, tag_info_t info2, bool last1, bool last2);
int regexec_nfa_posix(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int eflags)
init(ctx, string);
// root state can be non-core, so we pass zero as origin to avoid checks
- const conf_t c0(ctx.nfa->root, 0, HROOT);
+ const conf_t c0(ctx.nfa.root, 0, HROOT);
ctx.reach.push_back(c0);
closure_posix<ALG>(ctx);
for (;;) {
void make_one_step(simctx_t &ctx, uint32_t sym)
{
confset_t &state = ctx.state, &reach = ctx.reach;
- size_t j = 0;
+ uint32_t j = 0;
reach.clear();
for (cconfiter_t i = state.begin(), e = state.end(); i != e; ++i) {
if (s->type == nfa_state_t::RAN) {
for (const Range *r = s->ran.ran; r; r = r->next()) {
if (r->lower() <= sym && sym < r->upper()) {
- const conf_t c(s->ran.out, s->coreid, HROOT);
+ const conf_t c(s->ran.out, j, HROOT);
reach.push_back(c);
- state[j++] = *i;
- update_offsets(ctx, *i);
+ state[j] = *i;
+ update_offsets(ctx, *i, j);
+ ++j;
break;
}
}
}
else if (s->type == nfa_state_t::FIN) {
- update_offsets(ctx, *i);
+ update_offsets(ctx, *i, NONCORE);
}
}
std::swap(ctx.offsets1, ctx.offsets2);
if (!(ctx.flags & REG_SLOWPREC)) {
- update_prectbl(ctx);
+ compute_prectable(ctx);
}
else {
- update_prectbl_naive(ctx);
+ compute_prectbl_naive(ctx);
}
+ std::swap(ctx.newprectbl, ctx.oldprectbl);
+ ctx.oldprecdim = j;
- ctx.hist.init();
+ ctx.history.init();
++ctx.step;
}
DASSERT(s->status == GOR_NOPASS && s->active == 0);
if (s->type == nfa_state_t::FIN) {
- update_offsets(ctx, *i);
+ update_offsets(ctx, *i, NONCORE);
}
}
}
{
const uint32_t xo = x.origin, yo = y.origin;
return xo != yo
- && unpack_leftmost(ctx.prectbl1[xo * ctx.nfa->ncores + yo]) < 0;
+ && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0;
}
bool scan(simctx_t &ctx, nfa_state_t *q, bool all)
case nfa_state_t::TAG:
if (q->arcidx == 0) {
any |= relax_gor1(ctx, conf_t(q->tag.out, o
- , ctx.hist.push1(h, q->tag.info)));
+ , ctx.history.push1(h, q->tag.info)));
++q->arcidx;
}
break;
break;
case nfa_state_t::TAG:
relax_gtop(ctx, conf_t(q->tag.out, o
- , ctx.hist.push1(h, q->tag.info)));
+ , ctx.history.push1(h, q->tag.info)));
break;
default:
break;
}
}
-int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y
- , int32_t &prec1, int32_t &prec2)
-{
- const int32_t idx1 = x.thist, idx2 = y.thist;
- const uint32_t orig1 = x.origin, orig2 = y.origin;
-
- if (idx1 == idx2 && orig1 == orig2) {
- prec1 = prec2 = MAX_RHO;
- return 0;
- }
-
- const std::vector<Tag> &tags = ctx.nfa->tags;
- tag_history_t &hist = ctx.hist;
-
- const bool fork_frame = orig1 == orig2;
- if (fork_frame) {
- prec1 = prec2 = MAX_RHO;
- }
- else {
- prec1 = unpack_longest(ctx.prectbl1[orig1 * ctx.nfa->ncores + orig2]);
- prec2 = unpack_longest(ctx.prectbl1[orig2 * ctx.nfa->ncores + orig1]);
- }
-
- tag_info_t info1, info2;
- int32_t i1 = idx1, i2 = idx2;
- for (; i1 != i2; ) {
- if (i1 > i2) {
- const tag_history_t::node_t &n = hist.node(i1);
- info1 = n.info;
- prec1 = std::min(prec1, tags[info1.idx].height);
- i1 = n.pred;
- }
- else {
- const tag_history_t::node_t &n = hist.node(i2);
- info2 = n.info;
- prec2 = std::min(prec2, tags[info2.idx].height);
- i2 = n.pred;
- }
- }
- if (i1 != HROOT) {
- DASSERT(fork_frame);
- const int32_t h = tags[hist.node(i1).info.idx].height;
- prec1 = std::min(prec1, h);
- prec2 = std::min(prec2, h);
- }
-
- // longest precedence
- if (prec1 > prec2) return -1;
- if (prec1 < prec2) return 1;
-
- // leftmost precedence
- return fork_frame
- ? leftprec(ctx, info1, info2, i1 == idx1, i2 == idx2)
- : unpack_leftmost(ctx.prectbl1[orig1 * ctx.nfa->ncores + orig2]);
-}
-
-int32_t leftprec(simctx_t &, tag_info_t info1, tag_info_t info2, bool last1, bool last2)
-{
- // equal => not less
- if (last1 && last2) return 0;
-
- // shorter => less
- if (last1) return -1;
- if (last2) return 1;
-
- const uint32_t tag1 = info1.idx, tag2 = info2.idx;
- const bool neg1 = info1.neg, neg2 = info2.neg;
-
- // can't be both closing
- DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
-
- // closing vs opening: closing wins
- if (tag1 % 2 == 1) return -1;
- if (tag2 % 2 == 1) return 1;
-
- // can't be both negative
- DASSERT(!(neg1 && neg2));
-
- // positive vs negative: positive wins
- if (neg1) return 1;
- if (neg2) return -1;
-
- DASSERT(false);
- return 0;
-}
-
-void update_offsets(simctx_t &ctx, const conf_t &c)
+void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id)
{
const size_t nsub = ctx.nsub;
regoff_t *o;
- const std::vector<Tag> &tags = ctx.nfa->tags;
+ const std::vector<Tag> &tags = ctx.nfa.tags;
nfa_state_t *s = c.state;
bool *done = ctx.done;
o = ctx.offsets3;
}
else {
- o = ctx.offsets1 + s->coreid * nsub;
+ o = ctx.offsets1 + id * nsub;
}
memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t));
memset(done, 0, nsub * sizeof(bool));
for (int32_t i = c.thist; i != HROOT; ) {
- const tag_history_t::node_t &n = ctx.hist.node(i);
+ const tag_history_t::node_t &n = ctx.history.node(i);
const Tag &tag = tags[n.info.idx];
const size_t t = tag.ncap;
regoff_t *off = o + t;
}
}
-void update_prectbl(simctx_t &ctx)
-{
- const confset_t &state = ctx.state;
- const std::vector<Tag> &tags = ctx.nfa->tags;
- std::vector<const conf_t*> &sortcores = ctx.sortcores;
- std::vector<uint32_t> &fcount = ctx.fincount;
- std::vector<int32_t> stack = ctx.worklist;
- std::vector<histleaf_t> &level = ctx.histlevel;
- std::vector<histleaf_t>::reverse_iterator li, lj, lk, le;
- tag_history_t &hist = ctx.hist;
- const size_t ncores = ctx.nfa->ncores;
- int32_t *oldtbl = ctx.prectbl1, *newtbl = ctx.prectbl2;
-
- // Group core configurations by their history tree index, so that later
- // while traversing the tree we will know at once which configurations
- // (if any) are bound to the given tree node. We use counting sort, which
- // requires additional memory, but is fast and conveniently creates an
- // array of boundaries in the sorted configuration array.
- uint32_t maxfin = 0;
- for (cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) {
- uint32_t &x = hist.node1(c->thist).finidx;
- if (x >= USED) {
- x = maxfin++;
- fcount[x] = 0;
-
- // mark all nodes down to root as used (unless marked already)
- for (int32_t i = hist.node(c->thist).pred; i >= HROOT; ) {
- uint32_t &y = hist.node1(i).finidx;
- if (y <= USED) break;
- y = USED;
- i = hist.node(i).pred;
- }
- }
- ++fcount[x];
- }
- fcount[maxfin] = 0;
- for (size_t i = 1; i <= maxfin; ++i) {
- fcount[i] += fcount[i - 1];
- }
- sortcores.resize(state.size());
- for (rcconfiter_t c = state.rbegin(), e = state.rend(); c != e; ++c) {
- sortcores[--fcount[hist.node1(c->thist).finidx]] = &*c;
- }
-
- // Depth-first traversal of the history tree. During traversal we grow
- // an array of items (one item per core configuration). Items are added
- // in tree nodes that have core configurations associated with them.
- // Each item represents one history. Items have immutable part (core ID,
- // origin) and mutable part (current minimal height, current tree index)
- // that changes as we return down the tree.
- level.clear();
- stack.push_back(0);
- while (!stack.empty()) {
- const int32_t n = stack.back();
- tag_history_t::node1_t &node = hist.node1(n);
- const uint32_t fidx = node.finidx;
-
- if (fidx == NONFIN) {
- // aborted branch of search tree, don't waste time
- stack.pop_back();
- continue;
- }
-
- if (node.next != -1) {
- // start or continue visiting subtrees rooted at this node
- const tag_history_t::arc_t &arc = hist.arc(node.next);
- stack.push_back(arc.node);
- node.next = arc.next;
- continue;
- }
-
- // all subtrees visited, it's time to process this node
- const int32_t h = n == 0 ? MAX_RHO : tags[hist.node(n).info.idx].height;
- li = level.rbegin();
- le = level.rend();
-
- if (fidx < USED) {
- // this node has leaf configurations, add them to level
- for (uint32_t k = fcount[fidx], e = fcount[fidx + 1]; k < e; ++k) {
- const conf_t *c = sortcores[k];
- const histleaf_t l = {c->state->coreid, c->origin, HROOT, h};
- level.push_back(l);
- }
-
- // compute precedence for newly added configurations
- const int32_t p0 = pack(h, 0);
- for (lj = level.rbegin(); lj != li; ++lj) {
- for (lk = lj; lk != li; ++lk) {
- const uint32_t cj = lj->coreid, ck = lk->coreid;
- const uint32_t oj = lj->origin, ok = lk->origin;
- const bool fork = n != 0 || oj == ok;
- if (fork) {
- newtbl[cj * ncores + ck] = p0;
- newtbl[ck * ncores + cj] = p0;
- }
- else {
- newtbl[cj * ncores + ck] = oldtbl[oj * ncores + ok];
- newtbl[ck * ncores + cj] = oldtbl[ok * ncores + oj];
- }
- }
- }
- }
-
- // Each subtree appended a sequence of items to level. We can find
- // sequence boundaries by looking at tree index of each item: it is
- // equal to tree index of the corresponding subtree (except for the
- // leaf items added at this node; but we know where they start).
-
- // We must compute precedence for each pair of items from different
- // sequences (including leaf items added at this node), but not within
- // sequence boundaries: those histories fork higher up the subtree;
- // their precedence has already been computed and must not be touched.
-
- for (int32_t a = node.last; a != -1; ) {
- const tag_history_t::arc_t &arc = hist.arc(a);
- a = arc.prev;
-
- // for all the items of this subtree
- for (lk = li; li != le && li->hidx == arc.node; ++li) {
-
- // update height of each item coming from subtree
- li->height = std::min(li->height, h);
-
- // for all the level items to the right of this subtree
- for (lj = level.rbegin(); lj != lk; ++lj) {
-
- const uint32_t ci = li->coreid, cj = lj->coreid;
- const uint32_t oi = li->origin, oj = lj->origin;
- const bool fork = n != 0 || oi == oj;
- int32_t p1 = li->height, p2 = lj->height, p;
-
- if (!fork) {
- p1 = std::min(p1, unpack_longest(oldtbl[oi * ncores + oj]));
- p2 = std::min(p2, unpack_longest(oldtbl[oj * ncores + oi]));
- }
-
- if (p1 > p2) {
- p = -1;
- }
- else if (p1 < p2) {
- p = 1;
- }
- else if (fork) {
- const tag_info_t t1 = hist.node(li->hidx).info;
- const tag_info_t t2 = hist.node(lj->hidx).info;
- p = leftprec(ctx, t1, t2, t1 == NOINFO, t2 == NOINFO);
- }
- else {
- p = unpack_leftmost(oldtbl[oi * ncores + oj]);
- }
-
- newtbl[ci * ncores + cj] = pack(p1, p);
- newtbl[cj * ncores + ci] = pack(p2, -p);
- }
- }
- }
-
- // finally, downgrade tree index of all subtree items, making their
- // origins indistinguishable from each other for the previous level
- for (lj = level.rbegin(); lj != li; ++lj) {
- lj->hidx = n;
- }
-
- stack.pop_back();
- }
-
- std::swap(ctx.prectbl1, ctx.prectbl2);
-}
-
// Old naive algorithm that has cubic complexity in the size of TNFA.
// Example that exhibits cubic behaviour is ((a?){1,N})*. In this example
// closure has O(N) states, and the compared histories have O(N) length.
-void update_prectbl_naive(simctx_t &ctx)
+void compute_prectbl_naive(simctx_t &ctx)
{
const confset_t &state = ctx.state;
- const size_t ncores = ctx.nfa->ncores;
- int32_t *newtbl = ctx.prectbl2;
+ int32_t *newtbl = ctx.newprectbl;
+ const size_t newdim = state.size();
const int32_t p0 = pack(MAX_RHO, 0);
- for (cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) {
- nfa_state_t *s = c->state;
- DASSERT (s->type == nfa_state_t::RAN);
- newtbl[s->coreid * ncores + s->coreid] = p0;
-
- for (cconfiter_t d = c + 1; d != e; ++d) {
- nfa_state_t *q = d->state;
+ for (uint32_t i = 0; i < newdim; ++i) {
+ newtbl[i * newdim + i] = p0;
+ for (uint32_t j = i + 1; j < newdim; ++j) {
int32_t prec1, prec2;
- int32_t prec = precedence(ctx, *c, *d, prec1, prec2);
- newtbl[s->coreid * ncores + q->coreid] = pack(prec1, prec);
- newtbl[q->coreid * ncores + s->coreid] = pack(prec2, -prec);
+ int32_t prec = precedence(ctx, state[i], state[j], prec1, prec2);
+ newtbl[i * newdim + j] = pack(prec1, prec);
+ newtbl[j * newdim + i] = pack(prec2, -prec);
}
}
-
- std::swap(ctx.prectbl1, ctx.prectbl2);
}
} // namespace libre2c
simctx_t &ctx = *preg->simctx;
init(ctx, string);
- nfa_state_t *s0 = ctx.nfa->root;
+ nfa_state_t *s0 = ctx.nfa.root;
const conf_t c0(s0, s0->coreid, HROOT);
ctx.reach.push_back(c0);
closure_posix(ctx);
break;
case nfa_state_t::TAG:
relax(ctx, conf_t(q->tag.out, o
- , ctx.hist.push2(h, ctx.step, q->tag.info, o)));
+ , ctx.history.push2(h, ctx.step, q->tag.info, o)));
break;
default:
break;
return 0;
}
- const std::vector<Tag> &tags = ctx.nfa->tags;
- tag_history_t &hist = ctx.hist;
+ const std::vector<Tag> &tags = ctx.nfa.tags;
+ tag_history_t &hist = ctx.history;
int32_t prec = 0;
prec1 = prec2 = MAX_RHO;
T4("(ab|a)(bcd|c)(d|.*)", "abcd", 0,4, 0,2, 2,3, 3,4);
if (!(flags & REG_NFA)) {
- T3("((a?){1,100})*", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0,50, 0,50, 49,50);
+ T3("((a?){1,300})*", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0,50, 0,50, 49,50);
}
else if (!(flags & REG_SLOWPREC)) {
T3("((a?){1,1000})*", "aaaa", 0,4, 0,4, 3,4);
{
if (!debug) return;
- const closure_t &closure = ctx.dc_closure;
+ const closure_t &closure = ctx.state;
cclositer_t b = closure.begin(), e = closure.end(), c;
const uint32_t origin = ctx.dc_origin;
const uint32_t target = ctx.dc_target;
const uint32_t symbol = ctx.dc_symbol;
- const dfa_t &dfa = ctx.dc_dfa;
+ const dfa_t &dfa = ctx.dfa;
const tagver_table_t &tvtbl = ctx.dc_tagvertbl;
- const tag_history_t &thist = ctx.dc_taghistory;
+ const tag_history_t &thist = ctx.history;
uint32_t i;
if (target == dfa_t::NIL) return;
i = 0;
for (c = b; c != e; ++c, ++i) {
fprintf(stderr, "<TR><TD ALIGN=\"left\" PORT=\"%u\"%s>%u",
- i, style, static_cast<uint32_t>(c->state - ctx.dc_nfa.states));
+ i, style, static_cast<uint32_t>(c->state - ctx.nfa.states));
if (c->tvers != ZERO_TAGS) {
const tagver_t *vers = tvtbl[c->tvers];
fprintf(stderr, " %s%d", tagname(dfa.tags[t]), abs(vers[t]));
}
- if (c->tlook != HROOT) {
- dump_history(dfa, thist, c->tlook);
+ if (c->thist != HROOT) {
+ dump_history(dfa, thist, c->thist);
}
}
#include "src/options/opt.h"
#include "src/dfa/determinization.h"
#include "src/dfa/dfa.h"
+#include "src/dfa/posix_precedence.h"
#include "src/dfa/tcmd.h"
#include "src/nfa/nfa.h"
#include "src/regexp/rule.h"
void tagged_epsilon_closure(determ_context_t &ctx)
{
- closure_t &closure = ctx.dc_closure;
+ closure_t &closure = ctx.state;
// build tagged epsilon-closure of the given set of NFA states
if (ctx.dc_opts->posix_semantics) {
closure_posix(ctx);
- prune(closure, ctx.dc_nfa.rules);
+ prune(closure, ctx.nfa.rules);
std::sort(closure.begin(), closure.end(), cmpby_rule_state);
- orders(ctx);
+ compute_prectable(ctx);
} else {
closure_leftmost(ctx);
- prune(closure, ctx.dc_nfa.rules);
+ prune(closure, ctx.nfa.rules);
}
// see note [the difference between TDFA(0) and TDFA(1)]
void lower_lookahead_to_transition(closure_t &closure)
{
for (clositer_t c = closure.begin(); c != closure.end(); ++c) {
- c->ttran = c->tlook;
- c->tlook = HROOT;
+ c->ttran = c->thist;
+ c->thist = HROOT;
}
}
void generate_versions(determ_context_t &ctx)
{
- dfa_t &dfa = ctx.dc_dfa;
+ dfa_t &dfa = ctx.dfa;
const std::vector<Tag> &tags = dfa.tags;
const size_t ntag = tags.size();
tagver_t &maxver = dfa.maxtagver;
tagver_table_t &tvtbl = ctx.dc_tagvertbl;
tagver_t *vers = tvtbl.buffer;
- closure_t &clos = ctx.dc_closure;
- tag_history_t &thist = ctx.dc_taghistory;
+ closure_t &clos = ctx.state;
+ tag_history_t &thist = ctx.history;
newvers_t &newvers = ctx.dc_newvers;
clositer_t b = clos.begin(), e = clos.end(), c;
// normal transition, however absolute value should be unique
// among all versions of all tags)
for (c = b; c != e; ++c) {
- const hidx_t l = c->tlook, h = c->ttran;
+ const hidx_t l = c->thist, h = c->ttran;
if (h == HROOT) continue;
const tagver_t *vs = tvtbl[c->tvers];
void closure_leftmost(determ_context_t &ctx)
{
const closure_t &init = ctx.dc_reached;
- closure_t &done = ctx.dc_closure;
+ closure_t &done = ctx.state;
std::stack<clos_t> &todo = ctx.dc_stack_dfs;
// enqueue all initial states
break;
case nfa_state_t::TAG:
x.state = n->tag.out;
- x.tlook = ctx.dc_taghistory.push(x.tlook, n->tag.info);
+ x.thist = ctx.history.push(x.thist, n->tag.info);
todo.push(x);
break;
case nfa_state_t::RAN:
#include <queue>
#include "src/dfa/determinization.h"
+#include "src/dfa/posix_precedence.h"
#include "src/nfa/nfa.h"
{
DRESET_CLSTATS(ctx);
- ctx.dc_taghistory.detach();
+ ctx.history.detach();
switch (ctx.dc_opts->posix_closure) {
case POSIX_CLOSURE_GOR1: closure_posix_gor1(ctx); break;
DDUMP_CLSTATS(ctx);
// cleanup
- closure_t &cl = ctx.dc_closure;
+ closure_t &cl = ctx.state;
for (clositer_t i = cl.begin(); i != cl.end(); ++i) {
nfa_state_t *q = i->state;
q->clos = NOCLOS;
*/
void closure_posix_gor1(determ_context_t &ctx)
{
- closure_t &state = ctx.dc_closure, &reach = ctx.dc_reached;
+ closure_t &state = ctx.state, &reach = ctx.dc_reached;
std::vector<nfa_state_t*>
&topsort = ctx.dc_gor1_topsort,
&linear = ctx.dc_gor1_linear;
inline bool cmp_gor1_t::operator()(const clos_t &x, const clos_t &y) const
{
- const uint32_t xo = x.origin, yo = y.origin;
- if (xo == yo) return false;
-
// if longest components differ, leftmost already incorporates that
- const kernel_t *k = ctx.dc_kernels[ctx.dc_origin];
- return unpack_leftmost(k->prectbl[xo * k->size + yo]) < 0;
+ const uint32_t xo = x.origin, yo = y.origin;
+ return xo != yo
+ && unpack_leftmost(ctx.oldprectbl[xo * ctx.oldprecdim + yo]) < 0;
}
bool scan(determ_context_t &ctx, nfa_state_t *q, bool all)
{
bool any = false;
- clos_t x = ctx.dc_closure[q->clos];
+ clos_t x = ctx.state[q->clos];
switch (q->type) {
case nfa_state_t::NIL:
if (q->arcidx == 0) {
case nfa_state_t::TAG:
if (q->arcidx == 0) {
x.state = q->tag.out;
- x.tlook = ctx.dc_taghistory.push1(x.tlook, q->tag.info);
+ x.thist = ctx.history.push1(x.thist, q->tag.info);
any |= relax_gor1(ctx, x);
++q->arcidx;
}
bool relax_gor1(determ_context_t &ctx, const clos_t &x)
{
- closure_t &state = ctx.dc_closure;
+ closure_t &state = ctx.state;
nfa_state_t *q = x.state;
const uint32_t idx = q->clos;
int32_t p1, p2;
void closure_posix_gtop(determ_context_t &ctx)
{
const closure_t &reach = ctx.dc_reached;
- closure_t &state = ctx.dc_closure;
+ closure_t &state = ctx.state;
gtop_heap_t &heap = ctx.dc_gtop_heap;
state.clear();
q->active = 0;
DINCCOUNT_CLSCANS(ctx);
- clos_t x = ctx.dc_closure[q->clos];
+ clos_t x = ctx.state[q->clos];
switch (q->type) {
case nfa_state_t::NIL:
x.state = q->nil.out;
break;
case nfa_state_t::TAG:
x.state = q->tag.out;
- x.tlook = ctx.dc_taghistory.push1(x.tlook, q->tag.info);
+ x.thist = ctx.history.push1(x.thist, q->tag.info);
relax_gtop(ctx, x);
break;
default:
void relax_gtop(determ_context_t &ctx, const clos_t &c)
{
- closure_t &state = ctx.dc_closure;
+ closure_t &state = ctx.state;
nfa_state_t *q = c.state;
const uint32_t idx = q->clos;
int32_t p1, p2;
}
}
-void orders(determ_context_t &ctx)
-{
- closure_t &closure = ctx.dc_closure;
- const size_t nclos = closure.size();
-
- prectable_t *prectbl = ctx.dc_prectbl;
- static const int32_t P0 = pack(MAX_RHO, 0);
-
- for (size_t i = 0; i < nclos; ++i) {
- for (size_t j = i + 1; j < nclos; ++j) {
- int32_t rho1, rho2, l;
- l = precedence (ctx, closure[i], closure[j], rho1, rho2);
- prectbl[i * nclos + j] = pack(rho1, l);
- prectbl[j * nclos + i] = pack(rho2, -l);
- }
- prectbl[i * nclos + i] = P0;
- }
-}
-
} // namespace re2c
{
static void clear_caches(determ_context_t &ctx);
-static void reach_on_symbol(determ_context_t &);
+static void reach_on_symbol(determ_context_t &ctx, uint32_t sym);
static nfa_state_t *transition(nfa_state_t *, uint32_t);
static uint32_t init_tag_versions(determ_context_t &);
static void warn_nondeterministic_tags(const determ_context_t &);
clear_caches(ctx);
for (uint32_t c = 0; c < nchars; ++c) {
- ctx.dc_symbol = c;
-
- reach_on_symbol(ctx);
+ reach_on_symbol(ctx, c);
tagged_epsilon_closure(ctx);
find_state(ctx);
}
{
ctx.dc_newvers.clear();
- const size_t ntags = ctx.dc_nfa.tags.size();
+ const size_t ntags = ctx.nfa.tags.size();
for (size_t t = 0; t < ntags; ++t) {
ctx.dc_hc_caches[t].clear();
}
}
-void reach_on_symbol(determ_context_t &ctx)
+void reach_on_symbol(determ_context_t &ctx, uint32_t sym)
{
+ ctx.dc_symbol = sym;
+ const uint32_t symbol = ctx.dfa.charset[ctx.dc_symbol];
+
const kernel_t *kernel = ctx.dc_kernels[ctx.dc_origin];
- closure_t &reached = ctx.dc_reached;
- const uint32_t symbol = ctx.dc_dfa.charset[ctx.dc_symbol];
+ ctx.oldprectbl = kernel->prectbl;
+ ctx.oldprecdim = kernel->size;
+ closure_t &reached = ctx.dc_reached;
reached.clear();
+
for (uint32_t i = 0; i < kernel->size; ++i) {
nfa_state_t *s = transition(kernel->state[i], symbol);
if (s) {
- clos_t c = {s, i, kernel->tvers[i], kernel->tlook[i], HROOT};
+ clos_t c = {s, i, kernel->tvers[i], kernel->thist[i], HROOT};
reached.push_back(c);
}
}
uint32_t init_tag_versions(determ_context_t &ctx)
{
- dfa_t &dfa = ctx.dc_dfa;
+ dfa_t &dfa = ctx.dfa;
const size_t ntags = dfa.tags.size();
// all-zero tag configuration must have static number zero
Warn &warn = ctx.dc_msg.warn;
const kernels_t &kernels = ctx.dc_kernels;
- const std::vector<Tag> &tags = ctx.dc_dfa.tags;
- const std::valarray<Rule> &rules = ctx.dc_dfa.rules;
+ const std::vector<Tag> &tags = ctx.dfa.tags;
+ const std::valarray<Rule> &rules = ctx.dfa.rules;
const size_t
ntag = tags.size(),
: dc_opts(opts)
, dc_msg(msg)
, dc_condname(condname)
- , dc_nfa(nfa)
- , dc_dfa(dfa)
+ , nfa(nfa)
+ , dfa(dfa)
, dc_allocator()
, dc_origin(dfa_t::NIL)
, dc_target(dfa_t::NIL)
, dc_symbol(0)
, dc_actions(NULL)
, dc_reached()
- , dc_closure()
- , dc_prectbl()
+ , state()
, dc_tagvertbl(nfa.tags.size())
- , dc_taghistory()
+ , history()
, dc_kernels()
, dc_buffers(dc_allocator)
, dc_stack_dfs()
, dc_gtop_cmp()
, dc_gtop_heap(dc_gtop_cmp, dc_gtop_buffer)
, dc_hc_caches()
- , dc_newvers(newver_cmp_t(dc_taghistory, dc_hc_caches))
+ , dc_newvers(newver_cmp_t(history, dc_hc_caches))
, dc_path1()
, dc_path2()
, dc_path3()
, dc_tagcount()
+ , newprectbl(NULL)
+ , oldprectbl(NULL)
+ , oldprecdim(0)
+ , histlevel()
+ , sortcores()
+ , fincount()
+ , worklist()
, dc_dump(opts)
, dc_clstats()
{
dc_tagcount.resize(ntags);
if (opts->posix_semantics) {
- dc_prectbl = new prectable_t[ncores * ncores];
+ newprectbl = new prectable_t[ncores * ncores];
+ histlevel.reserve(ncores);
+ sortcores.reserve(ncores);
+ fincount.resize(ncores + 1);
+ worklist.reserve(nstates);
}
if (opts->posix_closure == POSIX_CLOSURE_GTOP) {
determ_context_t::~determ_context_t()
{
- delete[] dc_prectbl;
+ delete[] newprectbl;
}
typedef slab_allocator_t<1024 * 1024, sizeof(void*)> allocator_t;
+typedef int32_t prectable_t;
struct clos_t
uint32_t origin;
uint32_t tvers; // vector of tag versions (including lookahead tags)
hidx_t ttran; // history of transition tags
- hidx_t tlook; // history of lookahead tags
+ hidx_t thist; // history of lookahead tags
static inline bool fin(const clos_t &c) { return c.state->type == nfa_state_t::FIN; }
static inline bool ran(const clos_t &c) { return c.state->type == nfa_state_t::RAN; }
const prectable_t *prectbl;
nfa_state_t **state;
uint32_t *tvers; // tag versions
- hidx_t *tlook; // lookahead tags
+ hidx_t *thist; // lookahead tags
FORBID_COPY(kernel_t);
};
};
+struct histleaf_t
+{
+ uint32_t coreid;
+ uint32_t origin;
+ int32_t hidx;
+ int32_t height;
+};
+
+
typedef lookup_t<const kernel_t*> kernels_t;
typedef std::priority_queue<nfa_state_t*, std::vector<nfa_state_t*>
, cmp_gtop_t> gtop_heap_t;
struct determ_context_t
{
+ typedef std::vector<clos_t> confset_t;
+ typedef confset_t::iterator confiter_t;
+ typedef confset_t::const_iterator cconfiter_t;
+ typedef confset_t::reverse_iterator rconfiter_t;
+ typedef confset_t::const_reverse_iterator rcconfiter_t;
+
// determinization input
const opt_t *dc_opts; // options
Msg &dc_msg; // error messages and warnings
const std::string &dc_condname; // the name of current condition (with -c)
- const nfa_t &dc_nfa; // TNFA
+ const nfa_t &nfa; // TNFA
// determinization output
- dfa_t &dc_dfa; // resulting TDFA
+ dfa_t &dfa; // resulting TDFA
// temporary structures used by determinization
allocator_t dc_allocator;
uint32_t dc_symbol; // alphabet symbol of the current transition
tcmd_t *dc_actions; // tag actions of the current transition
closure_t dc_reached;
- closure_t dc_closure;
- prectable_t *dc_prectbl; // precedence table for Okui POSIX disambiguation
+ closure_t state;
tagver_table_t dc_tagvertbl;
- tag_history_t dc_taghistory; // prefix trie of tag histories
+ tag_history_t history; // prefix trie of tag histories
kernels_t dc_kernels; // TDFA states under construction
kernel_buffers_t dc_buffers;
std::stack<clos_t> dc_stack_dfs; // stack used for DFS in leftmost greedy closure
tag_path_t dc_path3; // buffer 3 for tag history
std::vector<uint32_t> dc_tagcount; // buffer for counting sort on tag history
+ // precedence table and auxilary data for POSIX disambiguation
+ int32_t *newprectbl;
+ const int32_t *oldprectbl;
+ size_t oldprecdim;
+ std::vector<histleaf_t> histlevel;
+ std::vector<uint32_t> sortcores;
+ std::vector<uint32_t> fincount;
+ std::vector<int32_t> worklist;
+
// debug
dump_dfa_t dc_dump;
closure_stats_t dc_clstats;
void tagged_epsilon_closure(determ_context_t &ctx);
void closure_posix(determ_context_t &);
void closure_leftmost(determ_context_t &);
-void orders(determ_context_t &);
void find_state(determ_context_t &ctx);
-int32_t precedence(determ_context_t &, const clos_t &, const clos_t &, int32_t &, int32_t &);
-int32_t unpack_longest(int32_t);
-int32_t unpack_leftmost(int32_t);
-int32_t pack(int32_t, int32_t);
bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const
{
return x->topord < y->topord;
}
-inline int32_t unpack_longest(int32_t packed)
-{
- // take lower 30 bits and sign-extend
- return static_cast<int32_t>(static_cast<uint32_t>(packed) << 2u) >> 2u;
-}
-
-inline int32_t unpack_leftmost(int32_t packed)
-{
- // take higher 2 bits and sign-extend
- return packed >> 30u;
-}
-
-inline int32_t pack(int32_t longest, int32_t leftmost)
-{
- // avoid signed overflows by using unsigned arithmetics
- uint32_t u_longest = static_cast<uint32_t>(longest);
- uint32_t u_leftmost = static_cast<uint32_t>(leftmost);
-
- // leftmost: higher 2 bits, longest: lower 30 bits
- uint32_t u_packed = (u_longest & 0x3fffFFFF) | (u_leftmost << 30u);
- int32_t packed = static_cast<int32_t>(u_packed);
-
- DASSERT(unpack_longest(packed) == longest
- && unpack_leftmost(packed) == leftmost);
-
- return packed;
-}
-
} // namespace re2c
#endif // _RE2C_DFA_DETERMINIZATION_
void find_state(determ_context_t &ctx)
{
- dfa_t &dfa = ctx.dc_dfa;
+ dfa_t &dfa = ctx.dfa;
// find or add the new state in the existing set of states
const bool is_new = do_find_state(ctx);
// check if the new state is final
// see note [at most one final item per closure]
cclositer_t
- b = ctx.dc_closure.begin(),
- e = ctx.dc_closure.end(),
+ b = ctx.state.begin(),
+ e = ctx.state.end(),
f = std::find_if(b, e, clos_t::fin);
if (f != e) {
t->tcmd[dfa.nchars] = final_actions(ctx, *f);
bool do_find_state(determ_context_t &ctx)
{
kernels_t &kernels = ctx.dc_kernels;
- const closure_t &closure = ctx.dc_closure;
+ const closure_t &closure = ctx.state;
// empty closure corresponds to default state
if (closure.size() == 0) {
kernel_t *k = ctx.dc_buffers.kernel;
// copy closure to buffer kernel
- copy_to_buffer_kernel(closure, ctx.dc_prectbl, k);
+ copy_to_buffer_kernel(closure, ctx.newprectbl, k);
// hash "static" part of the kernel
const uint32_t hash = hash_kernel(k);
tcmd_t *final_actions(determ_context_t &ctx, const clos_t &fin)
{
- dfa_t &dfa = ctx.dc_dfa;
+ dfa_t &dfa = ctx.dfa;
const Rule &rule = dfa.rules[fin.state->rule];
const tagver_t *vers = ctx.dc_tagvertbl[fin.tvers];
- const hidx_t look = fin.tlook;
- const tag_history_t &thist = ctx.dc_taghistory;
+ const hidx_t look = fin.thist;
+ const tag_history_t &thist = ctx.history;
tcpool_t &tcpool = dfa.tcpool;
tcmd_t *copy = NULL, *save = NULL, **p;
k->prectbl = NULL;
k->state = alc.alloct<nfa_state_t*>(size);
k->tvers = alc.alloct<uint32_t>(size);
- k->tlook = alc.alloct<hidx_t>(size);
+ k->thist = alc.alloct<hidx_t>(size);
return k;
}
memcpy(k->state, kernel->state, n * sizeof(void*));
memcpy(k->tvers, kernel->tvers, n * sizeof(uint32_t));
- memcpy(k->tlook, kernel->tlook, n * sizeof(hidx_t));
+ memcpy(k->thist, kernel->thist, n * sizeof(hidx_t));
prectable_t *ptbl = NULL;
if (kernel->prectbl) {
const clos_t &c = closure[i];
buffer->state[i] = c.state;
buffer->tvers[i] = c.tvers;
- buffer->tlook[i] = c.tlook;
+ buffer->thist[i] = c.thist;
}
}
{
kernel_buffers_t &kbufs = ctx.dc_buffers;
allocator_t &alc = ctx.dc_allocator;
- const tagver_t maxver = ctx.dc_dfa.maxtagver;
- const size_t nkern = ctx.dc_closure.size();
+ const tagver_t maxver = ctx.dfa.maxtagver;
+ const size_t nkern = ctx.state.size();
if (kbufs.maxsize < nkern) {
kbufs.maxsize = nkern * 2; // in advance
{
DASSERT(x->size == y->size);
- if (memcmp(x->tlook, y->tlook, x->size * sizeof(hidx_t)) == 0) {
+ if (memcmp(x->thist, y->thist, x->size * sizeof(hidx_t)) == 0) {
return true;
}
- tag_history_t &thist = ctx.dc_taghistory;
+ tag_history_t &thist = ctx.history;
tag_path_t &p1 = ctx.dc_path1, &p2 = ctx.dc_path2, &p3 = ctx.dc_path3;
std::vector<uint32_t> &count = ctx.dc_tagcount;
for (size_t i = 0; i < x->size; ++i) {
- const hidx_t xl = x->tlook[i], yl = y->tlook[i];
+ const hidx_t xl = x->thist[i], yl = y->thist[i];
if (xl == yl) continue;
&& equal_lookahead_tags(ctx, x, y);
if (!compatible) return false;
- const std::vector<Tag> &tags = ctx.dc_dfa.tags;
+ const std::vector<Tag> &tags = ctx.dfa.tags;
const size_t ntag = tags.size();
kernel_buffers_t &bufs = ctx.dc_buffers;
tagver_t *x2y = bufs.x2y, *y2x = bufs.y2x, max = bufs.max;
const tagver_t
*xvs = ctx.dc_tagvertbl[x->tvers[i]],
*yvs = ctx.dc_tagvertbl[y->tvers[i]];
- const hidx_t xl = x->tlook[i];
+ const hidx_t xl = x->thist[i];
for (size_t t = 0; t < ntag; ++t) {
// see note [mapping ignores items with lookahead tags]
if (!history(tags[t])
- && ctx.dc_taghistory.last(xl, t) != TAGVER_ZERO) continue;
+ && ctx.history.last(xl, t) != TAGVER_ZERO) continue;
const tagver_t xv = xvs[t], yv = yvs[t];
tagver_t &xv0 = y2x[yv], &yv0 = x2y[xv];
const tagver_t yv = x2y[xv], axv = abs(xv), ayv = abs(yv);
if (yv != TAGVER_ZERO && xv != yv && !fixed(tags[x2t[xv]])) {
DASSERT(axv != ayv);
- copy = ctx.dc_dfa.tcpool.make_copy(copy, axv, ayv);
+ copy = ctx.dfa.tcpool.make_copy(copy, axv, ayv);
}
}
+++ /dev/null
-#include <stdlib.h>
-
-#include "src/debug/debug.h"
-#include "src/dfa/determinization.h"
-#include "src/dfa/tag_history.h"
-
-
-namespace re2c
-{
-
-int32_t precedence(determ_context_t &ctx, const clos_t &x, const clos_t &y
- , int32_t &prec1, int32_t &prec2)
-{
- const int32_t idx1 = x.tlook, idx2 = y.tlook;
- const uint32_t orig1 = x.origin, orig2 = y.origin;
- int prec = 0;
-
- if (idx1 == idx2 && orig1 == orig2) {
- prec1 = prec2 = MAX_RHO;
- return prec;
- }
-
- const std::vector<Tag> &tags = ctx.dc_dfa.tags;
- tag_history_t &hist = ctx.dc_taghistory;
-
- const bool fork_frame = orig1 == orig2;
- if (fork_frame) {
- prec1 = prec2 = MAX_RHO;
- }
- else {
- const kernel_t *k = ctx.dc_kernels[ctx.dc_origin];
- prec = unpack_leftmost(k->prectbl[orig1 * k->size + orig2]);
- prec1 = unpack_longest(k->prectbl[orig1 * k->size + orig2]);
- prec2 = unpack_longest(k->prectbl[orig2 * k->size + orig1]);
- }
-
- tag_info_t info1, info2;
- int32_t i1 = idx1, i2 = idx2;
- for (; i1 != i2; ) {
- if (i1 > i2) {
- const tag_history_t::node_t &n = hist.node(i1);
- info1 = n.info;
- prec1 = std::min(prec1, tags[info1.idx].height);
- i1 = n.pred;
- }
- else {
- const tag_history_t::node_t &n = hist.node(i2);
- info2 = n.info;
- prec2 = std::min(prec2, tags[info2.idx].height);
- i2 = n.pred;
- }
- DINCCOUNT_CLLENGTH(ctx, 1);
- }
- if (i1 != HROOT) {
- DASSERT(fork_frame);
- const int32_t h = tags[hist.node(i1).info.idx].height;
- prec1 = std::min(prec1, h);
- prec2 = std::min(prec2, h);
- }
- DINCCOUNT_CLPREC(ctx);
-
- // longest precedence
- if (prec1 > prec2) return -1;
- if (prec1 < prec2) return 1;
-
- // leftmost precedence
- if (fork_frame) {
- // equal => not less
- if (i1 == idx1 && i2 == idx2) return 0;
-
- // shorter => less
- if (i1 == idx1) return -1;
- if (i2 == idx2) return 1;
-
- const uint32_t tag1 = info1.idx, tag2 = info2.idx;
- const bool neg1 = info1.neg, neg2 = info2.neg;
-
- // can't be both closing
- DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
-
- // closing vs opening: closing wins
- if (tag1 % 2 == 1) return -1;
- if (tag2 % 2 == 1) return 1;
-
- // can't be both negative
- DASSERT(!(neg1 && neg2));
-
- // positive vs negative: positive wins
- if (neg1) return 1;
- if (neg2) return -1;
-
- // positive vs positive: smaller wins
- // (this case is only possible because multiple
- // top-level RE don't have proper negative tags)
- if (tag1 < tag2) return -1;
- if (tag1 > tag2) return 1;
-
- DASSERT(false); // unreachable
- }
- return prec;
-}
-
-} // namespace re2c
--- /dev/null
+#ifndef _RE2C_DFA_POSIX_PRECEDENCE_
+#define _RE2C_DFA_POSIX_PRECEDENCE_
+
+#include "src/dfa/tag_history.h"
+#include "src/debug/debug.h"
+
+
+namespace re2c {
+
+inline int32_t unpack_longest(int32_t packed)
+{
+ // take lower 30 bits and sign-extend
+ return static_cast<int32_t>(static_cast<uint32_t>(packed) << 2u) >> 2u;
+}
+
+inline int32_t unpack_leftmost(int32_t packed)
+{
+ // take higher 2 bits and sign-extend
+ return packed >> 30u;
+}
+
+inline int32_t pack(int32_t longest, int32_t leftmost)
+{
+ // avoid signed overflows by using unsigned arithmetics
+ uint32_t u_longest = static_cast<uint32_t>(longest);
+ uint32_t u_leftmost = static_cast<uint32_t>(leftmost);
+
+ // leftmost: higher 2 bits, longest: lower 30 bits
+ uint32_t u_packed = (u_longest & 0x3fffFFFF) | (u_leftmost << 30u);
+ int32_t packed = static_cast<int32_t>(u_packed);
+
+ DASSERT(unpack_longest(packed) == longest
+ && unpack_leftmost(packed) == leftmost);
+
+ return packed;
+}
+
+inline int32_t leftprec(tag_info_t info1, tag_info_t info2, bool last1, bool last2)
+{
+ // equal => not less
+ if (last1 && last2) return 0;
+
+ // shorter => less
+ if (last1) return -1;
+ if (last2) return 1;
+
+ const uint32_t tag1 = info1.idx, tag2 = info2.idx;
+ const bool neg1 = info1.neg, neg2 = info2.neg;
+
+ // can't be both closing
+ DASSERT(!(tag1 % 2 == 1 && tag2 % 2 == 1));
+
+ // closing vs opening: closing wins
+ if (tag1 % 2 == 1) return -1;
+ if (tag2 % 2 == 1) return 1;
+
+ // can't be both negative
+ DASSERT(!(neg1 && neg2));
+
+ // positive vs negative: positive wins
+ if (neg1) return 1;
+ if (neg2) return -1;
+
+ // positive vs positive: smaller wins
+ // (this case is only possible because multiple
+ // top-level RE don't have proper negative tags)
+ if (tag1 < tag2) return -1;
+ if (tag1 > tag2) return 1;
+
+ DASSERT(false);
+ return 0;
+}
+
+template<typename ctx_t, typename conf_t>
+int32_t precedence(ctx_t &ctx, const conf_t &x, const conf_t &y
+ , int32_t &prec1, int32_t &prec2)
+{
+ prec1 = prec2 = MAX_RHO;
+ int32_t prec = 0;
+
+ const int32_t idx1 = x.thist, idx2 = y.thist;
+ const uint32_t orig1 = x.origin, orig2 = y.origin;
+
+ if (idx1 == idx2 && orig1 == orig2) {
+ return 0;
+ }
+
+ const std::vector<Tag> &tags = ctx.nfa.tags;
+ tag_history_t &hist = ctx.history;
+
+ const bool fork_frame = orig1 == orig2;
+ if (!fork_frame) {
+ prec = unpack_leftmost(ctx.oldprectbl[orig1 * ctx.oldprecdim + orig2]);
+ prec1 = unpack_longest(ctx.oldprectbl[orig1 * ctx.oldprecdim + orig2]);
+ prec2 = unpack_longest(ctx.oldprectbl[orig2 * ctx.oldprecdim + orig1]);
+ }
+
+ tag_info_t info1, info2;
+ int32_t i1 = idx1, i2 = idx2;
+ for (; i1 != i2; ) {
+ if (i1 > i2) {
+ const tag_history_t::node_t &n = hist.node(i1);
+ info1 = n.info;
+ prec1 = std::min(prec1, tags[info1.idx].height);
+ i1 = n.pred;
+ }
+ else {
+ const tag_history_t::node_t &n = hist.node(i2);
+ info2 = n.info;
+ prec2 = std::min(prec2, tags[info2.idx].height);
+ i2 = n.pred;
+ }
+ DINCCOUNT_CLLENGTH(ctx, 1);
+ }
+ if (i1 != HROOT) {
+ DASSERT(fork_frame);
+ const int32_t h = tags[hist.node(i1).info.idx].height;
+ prec1 = std::min(prec1, h);
+ prec2 = std::min(prec2, h);
+ }
+ DINCCOUNT_CLPREC(ctx);
+
+ // longest precedence
+ if (prec1 > prec2) return -1;
+ if (prec1 < prec2) return 1;
+
+ // leftmost precedence
+ return !fork_frame ? prec
+ : leftprec(info1, info2, i1 == idx1, i2 == idx2);
+}
+
+template<typename ctx_t>
+void compute_prectable(ctx_t &ctx)
+{
+ const typename ctx_t::confset_t &state = ctx.state;
+ const std::vector<Tag> &tags = ctx.nfa.tags;
+ tag_history_t &history = ctx.history;
+
+ const prectable_t *oldtbl = ctx.oldprectbl;
+ prectable_t *newtbl = ctx.newprectbl;
+ const size_t olddim = ctx.oldprecdim, newdim = state.size();
+
+ std::vector<uint32_t> &sortcores = ctx.sortcores;
+ std::vector<uint32_t> &fcount = ctx.fincount;
+ std::vector<int32_t> &stack = ctx.worklist;
+ std::vector<histleaf_t> &level = ctx.histlevel;
+ std::vector<histleaf_t>::reverse_iterator li, lj, lk, le;
+
+ level.clear();
+ level.reserve(newdim);
+ sortcores.resize(newdim);
+
+ // Group core configurations by their history tree index, so that later
+ // while traversing the tree we will know at once which configurations
+ // (if any) are bound to the given tree node. We use counting sort, which
+ // requires additional memory, but is fast and conveniently creates an
+ // array of boundaries in the sorted configuration array.
+ uint32_t maxfin = 0;
+ for (typename ctx_t::cconfiter_t c = state.begin(), e = state.end(); c != e; ++c) {
+ uint32_t &x = history.node1(c->thist).finidx;
+ if (x >= USED) {
+ x = maxfin++;
+ fcount[x] = 0;
+
+ // mark all nodes down to root as used (unless marked already)
+ for (int32_t i = history.node(c->thist).pred; i >= HROOT; ) {
+ uint32_t &y = history.node1(i).finidx;
+ if (y <= USED) break;
+ y = USED;
+ i = history.node(i).pred;
+ }
+ }
+ ++fcount[x];
+ }
+ fcount[maxfin] = 0;
+ for (size_t i = 1; i <= maxfin; ++i) {
+ fcount[i] += fcount[i - 1];
+ }
+ sortcores.resize(state.size());
+ for (uint32_t i = static_cast<uint32_t>(newdim); i --> 0; ) {
+ sortcores[--fcount[history.node1(state[i].thist).finidx]] = i;
+ }
+
+ // Depth-first traversal of the history tree. During traversal we grow
+ // an array of items (one item per core configuration). Items are added
+ // in tree nodes that have core configurations associated with them.
+ // Each item represents one history. Items have immutable part (core ID,
+ // origin) and mutable part (current minimal height, current tree index)
+ // that changes as we return down the tree.
+ stack.push_back(0);
+ while (!stack.empty()) {
+ const int32_t n = stack.back();
+ tag_history_t::node1_t &node = history.node1(n);
+ const uint32_t fidx = node.finidx;
+
+ if (fidx == NONFIN) {
+ // aborted branch of search tree, don't waste time
+ stack.pop_back();
+ continue;
+ }
+
+ if (node.next != -1) {
+ // start or continue visiting subtrees rooted at this node
+ const tag_history_t::arc_t &arc = history.arc(node.next);
+ stack.push_back(arc.node);
+ node.next = arc.next;
+ continue;
+ }
+
+ // all subtrees visited, it's time to process this node
+ const int32_t h = n == 0 ? MAX_RHO : tags[history.node(n).info.idx].height;
+ li = level.rbegin();
+ le = level.rend();
+
+ if (fidx < USED) {
+ // this node has leaf configurations, add them to level
+ for (uint32_t k = fcount[fidx], e = fcount[fidx + 1]; k < e; ++k) {
+ const uint32_t j = sortcores[k];
+ const histleaf_t l = {j, state[j].origin, HROOT, h};
+ level.push_back(l);
+ }
+
+ // compute precedence for newly added configurations
+ const int32_t p0 = pack(h, 0);
+ for (lj = level.rbegin(); lj != li; ++lj) {
+ for (lk = lj; lk != li; ++lk) {
+ const uint32_t cj = lj->coreid, ck = lk->coreid;
+ const uint32_t oj = lj->origin, ok = lk->origin;
+ const bool fork = n != 0 || oj == ok;
+ if (fork) {
+ newtbl[cj * newdim + ck] = p0;
+ newtbl[ck * newdim + cj] = p0;
+ }
+ else {
+ newtbl[cj * newdim + ck] = oldtbl[oj * olddim + ok];
+ newtbl[ck * newdim + cj] = oldtbl[ok * olddim + oj];
+ }
+ }
+ }
+ }
+
+ // Each subtree appended a sequence of items to level. We can find
+ // sequence boundaries by looking at tree index of each item: it is
+ // equal to tree index of the corresponding subtree (except for the
+ // leaf items added at this node; but we know where they start).
+
+ // We must compute precedence for each pair of items from different
+ // sequences (including leaf items added at this node), but not within
+ // sequence boundaries: those histories fork higher up the subtree;
+ // their precedence has already been computed and must not be touched.
+
+ for (int32_t a = node.last; a != -1; ) {
+ const tag_history_t::arc_t &arc = history.arc(a);
+ a = arc.prev;
+
+ // for all the items of this subtree
+ for (lk = li; li != le && li->hidx == arc.node; ++li) {
+
+ // update height of each item coming from subtree
+ li->height = std::min(li->height, h);
+
+ // for all the level items to the right of this subtree
+ for (lj = level.rbegin(); lj != lk; ++lj) {
+
+ const uint32_t ci = li->coreid, cj = lj->coreid;
+ const uint32_t oi = li->origin, oj = lj->origin;
+ const bool fork = n != 0 || oi == oj;
+ int32_t p1 = li->height, p2 = lj->height, p;
+
+ if (!fork) {
+ p1 = std::min(p1, unpack_longest(oldtbl[oi * olddim + oj]));
+ p2 = std::min(p2, unpack_longest(oldtbl[oj * olddim + oi]));
+ }
+
+ if (p1 > p2) {
+ p = -1;
+ }
+ else if (p1 < p2) {
+ p = 1;
+ }
+ else if (fork) {
+ const tag_info_t t1 = history.node(li->hidx).info;
+ const tag_info_t t2 = history.node(lj->hidx).info;
+ p = leftprec(t1, t2, t1 == NOINFO, t2 == NOINFO);
+ }
+ else {
+ p = unpack_leftmost(oldtbl[oi * olddim + oj]);
+ }
+
+ newtbl[ci * newdim + cj] = pack(p1, p);
+ newtbl[cj * newdim + ci] = pack(p2, -p);
+ }
+ }
+ }
+
+ // finally, downgrade tree index of all subtree items, making their
+ // origins indistinguishable from each other for the previous level
+ for (lj = level.rbegin(); lj != li; ++lj) {
+ lj->hidx = n;
+ }
+
+ stack.pop_back();
+ }
+}
+
+} // namespace re2c
+
+#endif // _RE2C_DFA_POSIX_PRECEDENCE_
{
typedef int32_t hidx_t;
-typedef int32_t prectable_t;
typedef std::vector<tag_info_t> tag_path_t;
static const hidx_t HROOT = 0;