dfa_t *dfa = NULL;
if (cflags & REG_NFA) {
- preg->simctx = new libre2c::simctx_t(*nfa, preg->re_nsub, cflags);
+ if ((cflags & REG_TRIE) && (cflags & REG_LEFTMOST)) {
+ preg->simctx = new libre2c::lzctx_t(*nfa, preg->re_nsub, cflags);
+ }
+ else if (cflags & REG_TRIE) {
+ preg->simctx = new libre2c::pzctx_t(*nfa, preg->re_nsub, cflags);
+ }
+ else if (cflags & REG_LEFTMOST) {
+ preg->simctx = new libre2c::lctx_t(*nfa, preg->re_nsub, cflags);
+ }
+ else {
+ preg->simctx = new libre2c::pctx_t(*nfa, preg->re_nsub, cflags);
+ }
}
else {
preg->char2class = new size_t[256];
struct RangeMgr;
} // namespace re2c
-namespace re2c {
-namespace libre2c {
-struct simctx_t;
-} // namespace libre2c
-} // namespace re2c
-
typedef ptrdiff_t regoff_t;
struct regmatch_t
regoff_t *regs;
size_t *char2class;
int flags;
- re2c::libre2c::simctx_t *simctx;
+ void *simctx;
};
static const int REG_NOMATCH = INT_MAX;
typedef confset_t::const_iterator cconfiter_t;
typedef confset_t::const_reverse_iterator rcconfiter_t;
+enum sema_t {POSIX, LEFTMOST};
+enum eval_t {STRICT, LAZY};
+
+template<sema_t SEMA, eval_t EVAL> struct history_type_t;
+template<> struct history_type_t<POSIX, STRICT> {typedef tag_history_t type;};
+template<> struct history_type_t<LEFTMOST, STRICT> {typedef tag_history_t type;};
+template<sema_t SEMA> struct history_type_t<SEMA, LAZY> {typedef tag_history_t type;};
+
+template<sema_t SEMA, eval_t EVAL>
struct simctx_t
{
typedef libre2c::conf_t conf_t;
typedef confset_t::const_iterator cconfiter_t;
typedef confset_t::reverse_iterator rconfiter_t;
typedef confset_t::const_reverse_iterator rcconfiter_t;
+ typedef typename history_type_t<SEMA, EVAL>::type history_t;
const nfa_t &nfa;
const size_t nsub;
const int flags;
- tag_history_t history;
+ history_t history;
int32_t hidx;
uint32_t step;
FORBID_COPY(simctx_t);
};
-void init(simctx_t &ctx, const char *string);
-int finalize(const simctx_t &ctx, const char *string, size_t nmatch, regmatch_t pmatch[]);
+typedef simctx_t<POSIX, STRICT> pctx_t;
+typedef simctx_t<LEFTMOST, STRICT> lctx_t;
+typedef simctx_t<POSIX, LAZY> pzctx_t;
+typedef simctx_t<LEFTMOST, LAZY> lzctx_t;
+
int regexec_dfa(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
int regexec_nfa_posix(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
int regexec_nfa_posix_trie(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
int regexec_nfa_leftmost(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
+template<sema_t SEMA, eval_t EVAL>
+simctx_t<SEMA, EVAL>::simctx_t(const nfa_t &nfa, size_t re_nsub, int flags)
+ : nfa(nfa)
+ , nsub(2 * (re_nsub - 1))
+ , flags(flags)
+ , history()
+ , hidx(HROOT)
+ , step(0)
+ , rule(Rule::NONE)
+ , cursor(NULL)
+ , marker(NULL)
+ , offsets1(NULL)
+ , offsets2(NULL)
+ , offsets3(NULL)
+ , done(NULL)
+ , newprectbl(NULL)
+ , oldprectbl(NULL)
+ , oldprecdim(0)
+ , histlevel()
+ , sortcores()
+ , fincount()
+ , worklist()
+ , cache()
+ , reach()
+ , state()
+ , gor1_topsort()
+ , gor1_linear()
+ , gtop_heap_storage()
+ , gtop_cmp()
+ , gtop_heap(gtop_cmp, gtop_heap_storage)
+ , dc_clstats()
+{
+ const size_t
+ nstates = nfa.size,
+ ncores = nfa.ncores;
+
+ state.reserve(nstates);
+ reach.reserve(nstates);
+
+ done = new bool[nsub];
+
+ if (!(flags & REG_TRIE)) {
+ offsets1 = new regoff_t[nsub * ncores];
+ offsets2 = new regoff_t[nsub * ncores];
+ offsets3 = new regoff_t[nsub];
+ }
+ if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
+ newprectbl = new int32_t[ncores * ncores];
+ oldprectbl = new int32_t[ncores * ncores];
+ histlevel.reserve(ncores);
+ sortcores.reserve(ncores);
+ fincount.resize(ncores + 1);
+ worklist.reserve(nstates);
+ }
+
+ if (flags & REG_GTOP) {
+ gtop_heap_storage.reserve(nstates);
+ }
+ else {
+ gor1_topsort.reserve(nstates);
+ gor1_linear.reserve(nstates);
+ }
+}
+
+template<sema_t SEMA, eval_t EVAL>
+simctx_t<SEMA, EVAL>::~simctx_t()
+{
+ delete[] done;
+ if (!(flags & REG_TRIE)) {
+ delete[] offsets1;
+ delete[] offsets2;
+ delete[] offsets3;
+ }
+ if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
+ delete[] newprectbl;
+ delete[] oldprectbl;
+ }
+}
+
+template<sema_t SEMA, eval_t EVAL>
+void init(simctx_t<SEMA, EVAL> &ctx, const char *string)
+{
+ ctx.reach.clear();
+ ctx.state.clear();
+ ctx.history.init();
+ ctx.hidx = HROOT;
+ ctx.step = 0;
+ ctx.rule = Rule::NONE;
+ ctx.cursor = ctx.marker = string;
+ ctx.cache.clear();
+ ctx.histlevel.clear();
+ ctx.sortcores.clear();
+ DASSERT(ctx.worklist.empty());
+ DASSERT(ctx.gor1_topsort.empty());
+ DASSERT(ctx.gor1_linear.empty());
+ DASSERT(ctx.gtop_heap.empty());
+}
+
+template<sema_t SEMA>
+int finalize(const simctx_t<SEMA, LAZY> &ctx, const char *string, size_t nmatch,
+ regmatch_t pmatch[])
+{
+ if (ctx.rule == Rule::NONE) {
+ return REG_NOMATCH;
+ }
+
+ regmatch_t *m = pmatch;
+ m->rm_so = 0;
+ m->rm_eo = ctx.marker - string - 1;
+
+ const std::vector<Tag> &tags = ctx.nfa.tags;
+ size_t todo = nmatch * 2;
+ bool *done = ctx.done;
+ memset(done, 0, ctx.nsub * sizeof(bool));
+
+ for (int32_t i = ctx.hidx; todo > 0 && i != HROOT; ) {
+ const tag_history_t::node_t &n = ctx.history.node(i);
+ const Tag &tag = tags[n.info.idx];
+ const size_t t = tag.ncap;
+ if (!fictive(tag) && t < nmatch * 2 && !done[t]) {
+ done[t] = true;
+ --todo;
+ const regoff_t off = n.info.neg ? -1
+ : static_cast<regoff_t>(ctx.history.node2(i).step);
+ m = &pmatch[t / 2 + 1];
+ if (t % 2 == 0) {
+ m->rm_so = off;
+ }
+ else {
+ m->rm_eo = off;
+ }
+ }
+ i = n.pred;
+ }
+
+ return 0;
+}
+
bool ran_or_fin_t::operator()(const conf_t &c)
{
switch (c.state->type) {
}
}
-namespace re2c {
-namespace libre2c {
-
-int finalize(const simctx_t &ctx, const char *string, size_t nmatch,
- regmatch_t pmatch[])
-{
- if (ctx.rule == Rule::NONE) {
- return REG_NOMATCH;
- }
-
- regmatch_t *m = pmatch;
- m->rm_so = 0;
- m->rm_eo = ctx.marker - string - 1;
-
- const std::vector<Tag> &tags = ctx.nfa.tags;
- size_t todo = nmatch * 2;
- bool *done = ctx.done;
- memset(done, 0, ctx.nsub * sizeof(bool));
-
- for (int32_t i = ctx.hidx; todo > 0 && i != HROOT; ) {
- const tag_history_t::node_t &n = ctx.history.node(i);
- const Tag &tag = tags[n.info.idx];
- const size_t t = tag.ncap;
- if (!fictive(tag) && t < nmatch * 2 && !done[t]) {
- done[t] = true;
- --todo;
- const regoff_t off = n.info.neg ? -1 : static_cast<regoff_t>(ctx.history.node2(i).step);
- m = &pmatch[t / 2 + 1];
- if (t % 2 == 0) {
- m->rm_so = off;
- }
- else {
- m->rm_eo = off;
- }
- }
- i = n.pred;
- }
-
- return 0;
-}
-
-simctx_t::simctx_t(const nfa_t &nfa, size_t re_nsub, int flags)
- : nfa(nfa)
- , nsub(2 * (re_nsub - 1))
- , flags(flags)
- , history()
- , hidx(HROOT)
- , step(0)
- , rule(Rule::NONE)
- , cursor(NULL)
- , marker(NULL)
- , offsets1(NULL)
- , offsets2(NULL)
- , offsets3(NULL)
- , done(NULL)
- , newprectbl(NULL)
- , oldprectbl(NULL)
- , oldprecdim(0)
- , histlevel()
- , sortcores()
- , fincount()
- , worklist()
- , cache()
- , reach()
- , state()
- , gor1_topsort()
- , gor1_linear()
- , gtop_heap_storage()
- , gtop_cmp()
- , gtop_heap(gtop_cmp, gtop_heap_storage)
- , dc_clstats()
-{
- const size_t
- nstates = nfa.size,
- ncores = nfa.ncores;
-
- state.reserve(nstates);
- reach.reserve(nstates);
-
- done = new bool[nsub];
-
- if (!(flags & REG_TRIE)) {
- offsets1 = new regoff_t[nsub * ncores];
- offsets2 = new regoff_t[nsub * ncores];
- offsets3 = new regoff_t[nsub];
- }
- if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
- newprectbl = new int32_t[ncores * ncores];
- oldprectbl = new int32_t[ncores * ncores];
- histlevel.reserve(ncores);
- sortcores.reserve(ncores);
- fincount.resize(ncores + 1);
- worklist.reserve(nstates);
- }
-
- if (flags & REG_GTOP) {
- gtop_heap_storage.reserve(nstates);
- }
- else {
- gor1_topsort.reserve(nstates);
- gor1_linear.reserve(nstates);
- }
-}
-
-simctx_t::~simctx_t()
-{
- delete[] done;
- if (!(flags & REG_TRIE)) {
- delete[] offsets1;
- delete[] offsets2;
- delete[] offsets3;
- }
- if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) {
- delete[] newprectbl;
- delete[] oldprectbl;
- }
-}
-
-void init(simctx_t &ctx, const char *string)
-{
- ctx.reach.clear();
- ctx.state.clear();
- ctx.history.init();
- ctx.hidx = HROOT;
- ctx.step = 0;
- ctx.rule = Rule::NONE;
- ctx.cursor = ctx.marker = string;
- ctx.cache.clear();
- ctx.histlevel.clear();
- ctx.sortcores.clear();
- DASSERT(ctx.worklist.empty());
- DASSERT(ctx.gor1_topsort.empty());
- DASSERT(ctx.gor1_linear.empty());
- DASSERT(ctx.gtop_heap.empty());
-}
-
-} // namespace libre2c
-} // namespace re2c
namespace re2c {
namespace libre2c {
-static void reach_on_symbol(simctx_t &, uint32_t);
-static void closure_leftmost(simctx_t &);
-static void update_offsets(simctx_t &ctx, const conf_t &c);
+static void reach_on_symbol(lctx_t &, uint32_t);
+static void closure_leftmost(lctx_t &);
+static void update_offsets(lctx_t &ctx, const conf_t &c);
int regexec_nfa_leftmost(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int)
{
- simctx_t &ctx = *preg->simctx;
+ lctx_t &ctx = *static_cast<lctx_t*>(preg->simctx);
init(ctx, string);
// root state can be non-core, so we pass zero as origin to avoid checks
return 0;
}
-void reach_on_symbol(simctx_t &ctx, uint32_t sym)
+void reach_on_symbol(lctx_t &ctx, uint32_t sym)
{
const confset_t &state = ctx.state;
confset_t &reach = ctx.reach;
ctx.history.init();
}
-void closure_leftmost(simctx_t &ctx)
+void closure_leftmost(lctx_t &ctx)
{
confset_t &state = ctx.state, &wl = ctx.reach;
state.clear();
}
}
-void update_offsets(simctx_t &ctx, const conf_t &c)
+void update_offsets(lctx_t &ctx, const conf_t &c)
{
const size_t nsub = ctx.nsub;
bool *done = ctx.done;
namespace re2c {
namespace libre2c {
-static void reach_on_symbol(simctx_t &, uint32_t);
-static void closure_leftmost(simctx_t &);
+static void reach_on_symbol(lzctx_t &, uint32_t);
+static void closure_leftmost(lzctx_t &);
int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int)
{
- simctx_t &ctx = *preg->simctx;
+ lzctx_t &ctx = *static_cast<lzctx_t*>(preg->simctx);
init(ctx, string);
nfa_state_t *s0 = ctx.nfa.root;
return finalize(ctx, string, nmatch, pmatch);
}
-void reach_on_symbol(simctx_t &ctx, uint32_t sym)
+void reach_on_symbol(lzctx_t &ctx, uint32_t sym)
{
const confset_t &state = ctx.state;
confset_t &reach = ctx.reach;
}
}
-void closure_leftmost(simctx_t &ctx)
+void closure_leftmost(lzctx_t &ctx)
{
confset_t &state = ctx.state, &wl = ctx.reach;
state.clear();
namespace re2c {
namespace libre2c {
-static void make_one_step(simctx_t &, uint32_t);
-static void make_final_step(simctx_t &);
-static void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id);
-static void compute_prectbl_naive(simctx_t &ctx);
+static void make_one_step(pctx_t &, uint32_t);
+static void make_final_step(pctx_t &);
+static void update_offsets(pctx_t &ctx, const conf_t &c, uint32_t id);
+static void compute_prectbl_naive(pctx_t &ctx);
// we *do* want these to be inlined
-static inline void closure_posix(simctx_t &ctx);
+static inline void closure_posix(pctx_t &ctx);
int regexec_nfa_posix(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int /* eflags */)
{
- simctx_t &ctx = *preg->simctx;
+ pctx_t &ctx = *static_cast<pctx_t*>(preg->simctx);
init(ctx, string);
// root state can be non-core, so we pass zero as origin to avoid checks
return 0;
}
-void closure_posix(simctx_t &ctx)
+void closure_posix(pctx_t &ctx)
{
ctx.history.detach();
}
}
-void make_one_step(simctx_t &ctx, uint32_t sym)
+void make_one_step(pctx_t &ctx, uint32_t sym)
{
confset_t &state = ctx.state, &reach = ctx.reach;
uint32_t j = 0;
++ctx.step;
}
-void make_final_step(simctx_t &ctx)
+void make_final_step(pctx_t &ctx)
{
for (cconfiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) {
nfa_state_t *s = i->state;
}
}
-void update_offsets(simctx_t &ctx, const conf_t &c, uint32_t id)
+void update_offsets(pctx_t &ctx, const conf_t &c, uint32_t id)
{
const size_t nsub = ctx.nsub;
regoff_t *o;
// Old naive algorithm that has cubic complexity in the size of TNFA.
// Example that exhibits cubic behaviour is ((a?){1,N})*. In this example
// closure has O(N) states, and the compared histories have O(N) length.
-void compute_prectbl_naive(simctx_t &ctx)
+void compute_prectbl_naive(pctx_t &ctx)
{
const confset_t &state = ctx.state;
int32_t *newtbl = ctx.newprectbl;
* tag values (instead of storing tags in registers at each step).
*/
-static void make_step(simctx_t &, uint32_t);
-static void make_final_step(simctx_t &);
-static void closure_posix(simctx_t &);
-static int32_t precedence(simctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy);
-static int32_t precedence_(simctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy);
+static void make_step(pzctx_t &, uint32_t);
+static void make_final_step(pzctx_t &);
+static void closure_posix(pzctx_t &);
+static int32_t precedence(pzctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy);
+static int32_t precedence_(pzctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy);
// we *do* want this to be inlined
-static inline void relax(simctx_t &, const conf_t &);
+static inline void relax(pzctx_t &, const conf_t &);
static inline uint32_t get_step(const tag_history_t &hist, int32_t idx);
static inline uint32_t get_orig(const tag_history_t &hist, int32_t idx);
int regexec_nfa_posix_trie(const regex_t *preg, const char *string
, size_t nmatch, regmatch_t pmatch[], int)
{
- simctx_t &ctx = *preg->simctx;
+ pzctx_t &ctx = *static_cast<pzctx_t*>(preg->simctx);
init(ctx, string);
nfa_state_t *s0 = ctx.nfa.root;
return finalize(ctx, string, nmatch, pmatch);
}
-void make_step(simctx_t &ctx, uint32_t sym)
+void make_step(pzctx_t &ctx, uint32_t sym)
{
const confset_t &state = ctx.state;
confset_t &reach = ctx.reach;
++ctx.step;
}
-void make_final_step(simctx_t &ctx)
+void make_final_step(pzctx_t &ctx)
{
for (confiter_t i = ctx.state.begin(), e = ctx.state.end(); i != e; ++i) {
nfa_state_t *s = i->state;
}
}
-void closure_posix(simctx_t &ctx)
+void closure_posix(pzctx_t &ctx)
{
const confset_t &reach = ctx.reach;
confset_t &state = ctx.state;
}
}
-void relax(simctx_t &ctx, const conf_t &c)
+void relax(pzctx_t &ctx, const conf_t &c)
{
confset_t &state = ctx.state;
nfa_state_t *q = c.state;
}
}
-int32_t precedence(simctx_t &ctx, int32_t idx1, int32_t idx2
+int32_t precedence(pzctx_t &ctx, int32_t idx1, int32_t idx2
, int32_t &prec1, int32_t &prec2)
{
int32_t prec = 0;
return prec;
}
-int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2
+int32_t precedence_(pzctx_t &ctx, int32_t idx1, int32_t idx2
, int32_t &prec1, int32_t &prec2)
{
if (idx1 == idx2) {
delete[] preg->pmatch;
if (preg->flags & REG_NFA) {
- delete preg->simctx;
+ if ((preg->flags & REG_TRIE) && (preg->flags & REG_LEFTMOST)) {
+ delete static_cast<libre2c::lzctx_t*>(preg->simctx);
+ }
+ else if (preg->flags & REG_TRIE) {
+ delete static_cast<libre2c::pzctx_t*>(preg->simctx);
+ }
+ else if (preg->flags & REG_LEFTMOST) {
+ delete static_cast<libre2c::lctx_t*>(preg->simctx);
+ }
+ else {
+ delete static_cast<libre2c::pctx_t*>(preg->simctx);
+ }
}
else {
delete[] preg->regs;