libre2c_posix_la_HDR = \
libre2c_posix/lex.h \
libre2c_posix/regex.h \
+ libre2c_posix/regex_impl.h \
src/codegen/bitmap.h \
src/codegen/emit.h \
src/codegen/go.h \
libre2c_posix_la_SRC = \
libre2c_posix/regcomp.cc \
libre2c_posix/regexec.cc \
+ libre2c_posix/regexec_dfa.cc \
+ libre2c_posix/regexec_nfa.cc \
libre2c_posix/regfree.cc \
libre2c_posix/stubs.cc \
src/parse/ast.cc \
# lexer depends on bison-generated header
libre2c_posix/lex.cc: libre2c_posix/parse.cc
-libre2c_posix_la_test_SOURCES = libre2c_posix/test.cpp
-libre2c_posix_la_test_LDADD = libre2c_posix.la
-check_PROGRAMS += libre2c_posix_la_test
+test_libre2c_posix_SOURCES = libre2c_posix/test.cpp
+test_libre2c_posix_LDADD = libre2c_posix.la
+check_PROGRAMS += test_libre2c_posix
using namespace re2c;
-int regcomp(regex_t *preg, const char *pattern, int /* cflags */)
+int regcomp(regex_t *preg, const char *pattern, int cflags)
{
conopt_t globopts;
globopts.FFlag = true;
Warn warn;
+ preg->flags = cflags;
+
const AST *a = parse(pattern);
preg->rmgr = new RangeMgr;
arv.push_back(ar);
RESpec re(arv, opt, warn, *preg->rmgr);
- preg->char2class = new size_t[256];
- split_charset(re);
- for (uint32_t i = 1, j = 0; i < re.charset.size(); ++i) {
- for (; j < re.charset[i]; ++j) {
- preg->char2class[j] = i - 1;
- }
- }
-
find_fixed_tags(re);
insert_default_tags(re);
nfa_t *nfa = new nfa_t(re);
- dfa_t *dfa = new dfa_t(*nfa, opt, "", warn);
+ DASSERT(nfa->rules.size() == 1);
+ preg->re_nsub = nfa->rules[0].ncap + 1;
+ preg->pmatch = new regmatch_t[preg->re_nsub];
+
+ dfa_t *dfa = NULL;
+ if (cflags & REG_NFA) {
+ const size_t sz = 2 * nfa->size * nfa->size;
+ preg->prec_buf1 = new int32_t[sz];
+ preg->prec_buf2 = new int32_t[sz];
+ }
+ else {
+ preg->char2class = new size_t[256];
+ split_charset(re);
+ for (uint32_t i = 1, j = 0; i < re.charset.size(); ++i) {
+ for (; j < re.charset[i]; ++j) {
+ preg->char2class[j] = i - 1;
+ }
+ }
- compact_and_optimize_tags(opt, *dfa);
+ dfa = new dfa_t(*nfa, opt, "", warn);
+
+ compact_and_optimize_tags(opt, *dfa);
+
+ preg->regs = new regoff_t[dfa->maxtagver + 1];
+ }
- DASSERT(dfa->rules.size() == 1);
- preg->re_nsub = dfa->rules[0].ncap + 1;
- preg->pmatch = new regmatch_t[preg->re_nsub];
- preg->regs = new regoff_t[dfa->maxtagver + 1];
preg->nfa = nfa;
preg->dfa = dfa;
regoff_t rm_eo;
};
+// standard flags
+static const int REG_EXTENDED = 1u << 0;
+static const int REG_ICASE = 1u << 1;
+static const int REG_NOSUB = 1u << 2;
+static const int REG_NEWLINE = 1u << 3;
+static const int REG_NOTBOL = 1u << 4;
+static const int REG_NOTEOL = 1u << 5;
+// extensions
+static const int REG_NFA = 1u << 6;
+
+
struct regex_t
{
size_t re_nsub;
regmatch_t *pmatch;
regoff_t *regs;
size_t *char2class;
+ int *prec_buf1;
+ int *prec_buf2;
+ int flags;
};
static const int REG_NOMATCH = INT_MAX;
int regcomp(regex_t *preg, const char *pattern, int cflags);
-
size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size);
-
int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
-
void regfree(regex_t *preg);
#endif // _RE2C_LIB_REGEX_
--- /dev/null
+#ifndef _RE2C_LIB_REGEX_IMPL_
+#define _RE2C_LIB_REGEX_IMPL_
+
+#include "regex.h"
+#include <stddef.h>
+
+
+namespace re2c {
+
+int regexec_dfa(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
+int regexec_nfa(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
+
+} // namespace re2c
+
+#endif // _RE2C_LIB_REGEX_IMPL_
-#include <assert.h>
-#include <stdio.h>
-
-#include "libre2c_posix/lex.h"
#include "libre2c_posix/regex.h"
-#include "src/options/opt.h"
-#include "src/options/warn.h"
-#include "src/nfa/nfa.h"
-#include "src/debug/debug.h"
-#include "src/dfa/dfa.h"
+#include "libre2c_posix/regex_impl.h"
using namespace re2c;
-static void apply_regops(regoff_t *regs, const tcmd_t *cmd, regoff_t pos)
-{
- for (const tcmd_t *p = cmd; p; p = p->next) {
- if (tcmd_t::iscopy(p)) {
- regs[p->lhs] = regs[p->rhs];
- }
- else {
- DASSERT (tcmd_t::isset(p));
- regs[p->lhs] = *p->history == TAGVER_BOTTOM ? -1 : pos;
- }
- }
-}
-
int regexec(const regex_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int /* eflags */)
+ regmatch_t pmatch[], int eflags)
{
- const dfa_t *dfa = preg->dfa;
- int result = REG_NOMATCH;
- regoff_t *regs = preg->regs;
- size_t i = 0;
- const char *p = string, *q = p;
- const dfa_state_t *s, *x = NULL;
-
- apply_regops(regs, dfa->tcmd0, 0);
-
- for (;;) {
- s = dfa->states[i];
- const int32_t c = *p++;
- const size_t j = preg->char2class[c];
- i = s->arcs[j];
-
- if (s->rule != Rule::NONE) {
- q = p;
- x = s;
- }
-
- if (i == dfa_t::NIL || c == 0) break;
-
- apply_regops(regs, s->tcmd[j], p - string - 1);
- }
-
- if (s->rule == Rule::NONE && x != NULL) {
- s = x;
- p = q;
- }
-
- if (s->rule != Rule::NONE) {
- regmatch_t *m = pmatch;
- result = 0;
- const regoff_t mlen = p - string - 1;
-
- apply_regops(regs, s->tcmd[dfa->nchars], mlen);
-
- m->rm_so = 0;
- m->rm_eo = mlen;
- ++m;
-
- const Rule &rule = dfa->rules[0];
- for (size_t t = rule.ltag; t < rule.htag; ++t) {
-
- const Tag &tag = dfa->tags[t];
- if (fictive(tag)) continue;
- if (tag.ncap >= nmatch * 2) break;
-
- regoff_t off;
- if (!fixed(tag)) {
- off = regs[dfa->finvers[t]];
- }
- else {
- off = tag.base == Tag::RIGHTMOST
- ? mlen : regs[dfa->finvers[tag.base]];
- DASSERT (off != -1);
- off -= static_cast<regoff_t>(tag.dist);
- }
-
- if (tag.ncap % 2 == 0) {
- m->rm_so = off;
- }
- else {
- m->rm_eo = off;
- ++m;
- }
- }
- }
-
- return result;
+ return (preg->flags & REG_NFA)
+ ? regexec_nfa(preg, string, nmatch, pmatch, eflags)
+ : regexec_dfa(preg, string, nmatch, pmatch, eflags);
}
+
--- /dev/null
+#include "libre2c_posix/lex.h"
+#include "libre2c_posix/regex.h"
+#include "libre2c_posix/regex_impl.h"
+#include "src/options/opt.h"
+#include "src/options/warn.h"
+#include "src/debug/debug.h"
+#include "src/dfa/dfa.h"
+
+
+namespace re2c {
+
+static void apply_regops(regoff_t *regs, const tcmd_t *cmd, regoff_t pos)
+{
+ for (const tcmd_t *p = cmd; p; p = p->next) {
+ if (tcmd_t::iscopy(p)) {
+ regs[p->lhs] = regs[p->rhs];
+ }
+ else {
+ DASSERT (tcmd_t::isset(p));
+ regs[p->lhs] = *p->history == TAGVER_BOTTOM ? -1 : pos;
+ }
+ }
+}
+
+int regexec_dfa(const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int /* eflags */)
+{
+ const dfa_t *dfa = preg->dfa;
+ int result = REG_NOMATCH;
+ regoff_t *regs = preg->regs;
+ size_t i = 0;
+ const char *p = string, *q = p;
+ const dfa_state_t *s, *x = NULL;
+
+ apply_regops(regs, dfa->tcmd0, 0);
+
+ for (;;) {
+ s = dfa->states[i];
+ const int32_t c = *p++;
+ const size_t j = preg->char2class[c];
+ i = s->arcs[j];
+
+ if (s->rule != Rule::NONE) {
+ q = p;
+ x = s;
+ }
+
+ if (i == dfa_t::NIL || c == 0) break;
+
+ apply_regops(regs, s->tcmd[j], p - string - 1);
+ }
+
+ if (s->rule == Rule::NONE && x != NULL) {
+ s = x;
+ p = q;
+ }
+
+ if (s->rule != Rule::NONE) {
+ regmatch_t *m = pmatch;
+ result = 0;
+ const regoff_t mlen = p - string - 1;
+
+ apply_regops(regs, s->tcmd[dfa->nchars], mlen);
+
+ m->rm_so = 0;
+ m->rm_eo = mlen;
+ ++m;
+
+ const Rule &rule = dfa->rules[0];
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+
+ const Tag &tag = dfa->tags[t];
+ if (fictive(tag)) continue;
+ if (tag.ncap >= nmatch * 2) break;
+
+ regoff_t off;
+ if (!fixed(tag)) {
+ off = regs[dfa->finvers[t]];
+ }
+ else {
+ off = tag.base == Tag::RIGHTMOST
+ ? mlen : regs[dfa->finvers[tag.base]];
+ DASSERT (off != -1);
+ off -= static_cast<regoff_t>(tag.dist);
+ }
+
+ if (tag.ncap % 2 == 0) {
+ m->rm_so = off;
+ }
+ else {
+ m->rm_eo = off;
+ ++m;
+ }
+ }
+ }
+
+ return result;
+}
+
+} // namespace re2c
+
--- /dev/null
+#include <queue>
+
+#include "libre2c_posix/lex.h"
+#include "libre2c_posix/regex.h"
+#include "libre2c_posix/regex_impl.h"
+#include "src/options/opt.h"
+#include "src/options/warn.h"
+#include "src/debug/debug.h"
+#include "src/dfa/determinization.h"
+#include "src/nfa/nfa.h"
+
+
+namespace re2c {
+
+typedef std::vector<tag_info_t> tag_path_t;
+
+struct history_t
+{
+ struct node_t {
+ uint32_t pred;
+ uint32_t step;
+ tag_info_t info;
+ };
+
+ std::vector<node_t> nodes;
+ tag_path_t path1;
+ tag_path_t path2;
+
+ inline history_t(size_t nstates, size_t ntags);
+ inline uint32_t push(uint32_t i, uint32_t step, tag_info_t info);
+ regoff_t last(uint32_t i, size_t t) const;
+ FORBID_COPY(history_t);
+};
+
+struct conf_t
+{
+ nfa_state_t *state;
+ uint32_t origin;
+ uint32_t thist;
+
+ static inline bool fin(const conf_t &c);
+ static inline bool ran(const conf_t &c);
+};
+
+struct cmp_gtop_t
+{
+ inline bool operator() (const nfa_state_t *x, const nfa_state_t *y) const;
+};
+
+typedef std::vector<conf_t> confset_t;
+typedef confset_t::iterator confiter_t;
+typedef confset_t::const_iterator cconfiter_t;
+typedef std::priority_queue<nfa_state_t*, std::vector<nfa_state_t*>
+ , cmp_gtop_t> worklist_t;
+
+struct simctx_t
+{
+ const nfa_t *nfa;
+ confset_t reach;
+ confset_t state;
+ int32_t *prec;
+ int32_t *prec_next;
+ history_t hist;
+ uint32_t hidx;
+ uint32_t step;
+ size_t rule;
+ const char *cursor;
+ const char *marker;
+
+ inline simctx_t(const regex_t *preg, const char *string);
+ FORBID_COPY(simctx_t);
+};
+
+static void reach_on_symbol(simctx_t &, uint32_t);
+static void closure(simctx_t &);
+static void relax(simctx_t &, const conf_t &, worklist_t &);
+static int32_t precedence(simctx_t &, const conf_t &, const conf_t &, int32_t &, int32_t &);
+static inline void reconstruct_history(const history_t &, tag_path_t &, uint32_t, uint32_t);
+static uint32_t index(const nfa_t *, const nfa_state_t *);
+
+int regexec_nfa(const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int)
+{
+ simctx_t ctx(preg, string);
+ const nfa_t *nfa = ctx.nfa;
+ int result = REG_NOMATCH;
+
+ const conf_t c0 = {nfa->root, index(nfa, nfa->root), HROOT};
+ ctx.reach.push_back(c0);
+ closure(ctx);
+
+ for (;;) {
+ const uint32_t sym = static_cast<uint8_t>(*ctx.cursor++);
+ if (ctx.state.empty() || sym == 0) break;
+ reach_on_symbol(ctx, sym);
+ ++ctx.step;
+ closure(ctx);
+ }
+
+ ctx.cursor = ctx.marker;
+
+ if (ctx.rule != Rule::NONE) {
+ regmatch_t *m = pmatch;
+ result = 0;
+
+ m->rm_so = 0;
+ m->rm_eo = ctx.cursor - string - 1;
+ ++m;
+
+ const Rule &rule = nfa->rules[0];
+ for (size_t t = rule.ltag; t < rule.htag; ++t) {
+ const Tag &tag = nfa->tags[t];
+
+ if (fictive(tag)) continue;
+ if (tag.ncap >= nmatch * 2) break;
+
+ const regoff_t off = ctx.hist.last(ctx.hidx, t);
+ if (tag.ncap % 2 == 0) {
+ m->rm_so = off;
+ }
+ else {
+ m->rm_eo = off;
+ ++m;
+ }
+ }
+ }
+
+ return result;
+}
+
+void reach_on_symbol(simctx_t &ctx, uint32_t sym)
+{
+ const nfa_t *nfa = ctx.nfa;
+ const confset_t &state = ctx.state;
+ confset_t &reach = ctx.reach;
+ cconfiter_t b = state.begin(), e = state.end(), i;
+
+ reach.clear();
+ for (i = b; i != e; ++i) {
+ nfa_state_t *s = i->state;
+ if (s->type == nfa_state_t::RAN) {
+ for (const Range *r = s->ran.ran; r; r = r->next()) {
+ if (r->lower() <= sym && sym < r->upper()) {
+ conf_t c = {s->ran.out, index(nfa, s), i->thist};
+ reach.push_back(c);
+ }
+ }
+ }
+ }
+}
+
+void closure(simctx_t &ctx)
+{
+ const nfa_t *nfa = ctx.nfa;
+ const confset_t &reach = ctx.reach;
+ confset_t &state = ctx.state;
+
+ worklist_t wl;
+ state.clear();
+
+ for (cconfiter_t c = reach.begin(); c != reach.end(); ++c) {
+ relax(ctx, *c, wl);
+ }
+
+ for (; !wl.empty(); ) {
+ nfa_state_t *q = wl.top();
+ wl.pop();
+ q->active = 0;
+ conf_t x = state[q->clos];
+
+ switch (q->type) {
+ case nfa_state_t::NIL:
+ x.state = q->nil.out;
+ relax(ctx, x, wl);
+ break;
+ case nfa_state_t::ALT:
+ x.state = q->alt.out1;
+ relax(ctx, x, wl);
+ x.state = q->alt.out2;
+ relax(ctx, x, wl);
+ break;
+ case nfa_state_t::TAG:
+ x.state = q->tag.out;
+ x.thist = ctx.hist.push(x.thist, ctx.step, q->tag.info);
+ relax(ctx, x, wl);
+ break;
+ case nfa_state_t::FIN:
+ ctx.marker = ctx.cursor + 1;
+ ctx.hidx = x.thist;
+ ctx.rule = 0;
+ break;
+ case nfa_state_t::RAN:
+ break;
+ }
+ }
+
+ confiter_t b = state.begin(), e = state.end(), i, j;
+
+ for (i = b; i != e; ++i) {
+ i->state->clos = NOCLOS;
+ DASSERT(i->state->active == 0);
+ }
+
+ // drop "inner" states (non-final without outgoing non-epsilon transitions)
+ j = std::partition(b, e, conf_t::ran);
+ e = std::partition(j, e, conf_t::fin);
+ size_t n = static_cast<size_t>(e - b);
+
+ // we must have exactly one final state
+ if (j != e) {
+ DASSERT(j + 1 == e);
+ n = static_cast<size_t>(j - b) + 1;
+ }
+
+ state.resize(n);
+
+ int32_t *prec = ctx.prec_next;
+ const size_t nstates = nfa->size;
+ for (i = b; i != e; ++i) {
+ uint32_t ix = index(nfa, i->state);
+ for (j = i + 1; j != e; ++j) {
+ uint32_t jx = index(nfa, j->state);
+ int32_t rho1, rho2, l;
+ l = precedence(ctx, *i, *j, rho1, rho2);
+ prec[ix * nstates + jx] = pack(rho1, l);
+ prec[jx * nstates + ix] = pack(rho2, -l);
+ }
+ prec[ix * nstates + ix] = 0;
+ }
+ std::swap(ctx.prec, ctx.prec_next);
+}
+
+void relax(simctx_t &ctx, const conf_t &c, worklist_t &wl)
+{
+ confset_t &state = ctx.state;
+ nfa_state_t *q = c.state;
+ const uint32_t idx = q->clos;
+ int32_t h1, h2;
+
+ // first time we see this state
+ if (idx == NOCLOS) {
+ q->clos = static_cast<uint32_t>(state.size());
+ state.push_back(c);
+ }
+
+ // States of in-degree less than 2 are not joint points;
+ // the fact that we are re-scanning this state means that we found
+ // a better path to some previous state. Due to the right distributivity
+ // of path comparison over path concatenation (X < Y => XZ < YZ) we
+ // can just propagate the new path up to the next join point.
+ else if (q->indeg < 2) {
+ state[idx] = c;
+ }
+
+ // join point; compare the new path and the old path
+ else if (precedence(ctx, c, state[idx], h1, h2) < 0) {
+ state[idx] = c;
+ }
+
+ // the previous path was better, discard the new one
+ else {
+ q = NULL;
+ }
+
+ if (q != NULL && !q->active) {
+ q->active = 1;
+ wl.push(q);
+ }
+}
+
+int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y
+ , int32_t &rhox, int32_t &rhoy)
+{
+ const size_t nstates = ctx.nfa->size;
+ const std::vector<Tag> &tags = ctx.nfa->tags;
+ const uint32_t xl = x.thist, yl = y.thist;
+ const uint32_t xo = x.origin, yo = y.origin;
+
+ if (xl == yl && xo == yo) {
+ rhox = rhoy = MAX_RHO;
+ return 0;
+ }
+
+ history_t &hist = ctx.hist;
+ tag_path_t &p1 = hist.path1, &p2 = hist.path2;
+ reconstruct_history(hist, p1, xl, ctx.step);
+ reconstruct_history(hist, p2, yl, ctx.step);
+ tag_path_t::const_reverse_iterator
+ i1 = p1.rbegin(), e1 = p1.rend(), j1 = i1, g1,
+ i2 = p2.rbegin(), e2 = p2.rend(), j2 = i2, g2;
+
+ const bool fork_frame = xo == yo;
+
+ // longest precedence
+ if (fork_frame) {
+ // find fork
+ for (; j1 != e1 && j2 != e2 && *j1 == *j2; ++j1, ++j2);
+ rhox = rhoy = j1 > i1
+ ? tags[(j1 - 1)->idx].height : MAX_RHO;
+ }
+ else {
+ // get precedence table and size of the origin state
+ rhox = unpack_longest(ctx.prec[xo * nstates + yo]);
+ rhoy = unpack_longest(ctx.prec[yo * nstates + xo]);
+ }
+ for (g1 = j1; g1 != e1; ++g1) {
+ rhox = std::min(rhox, tags[g1->idx].height);
+ }
+ for (g2 = j2; g2 != e2; ++g2) {
+ rhoy = std::min(rhoy, tags[g2->idx].height);
+ }
+ if (rhox > rhoy) return -1;
+ if (rhox < rhoy) return 1;
+
+ // leftmost precedence
+ if (fork_frame) {
+ // equal => not less
+ if (j1 == e1 && j2 == e2) return 0;
+
+ // shorter => less
+ if (j1 == e1) return -1;
+ if (j2 == e2) return 1;
+
+ const uint32_t idx1 = j1->idx, idx2 = j2->idx;
+ const bool neg1 = j1->neg, neg2 = j2->neg;
+
+ // can't be both closing
+ DASSERT(!(idx1 % 2 == 1 && idx2 % 2 == 1));
+
+ // closing vs opening: closing wins
+ if (idx1 % 2 == 1) return -1;
+ if (idx2 % 2 == 1) return 1;
+
+ // can't be both negative
+ DASSERT(!(neg1 && neg2));
+
+ // positive vs negative: positive wins
+ if (neg1) return 1;
+ if (neg2) return -1;
+
+ // positive vs positive: smaller wins
+ // (this case is only possible because multiple
+ // top-level RE don't have proper negative tags)
+ if (idx1 < idx2) return -1;
+ if (idx1 > idx2) return 1;
+ }
+ else {
+ return unpack_leftmost(ctx.prec[xo * nstates + yo]);
+ }
+
+ // unreachable
+ DASSERT(false);
+ return 0;
+}
+
+simctx_t::simctx_t(const regex_t *preg, const char *string)
+ : nfa(preg->nfa)
+ , reach()
+ , state()
+ , prec(preg->prec_buf1)
+ , prec_next(preg->prec_buf2)
+ , hist(nfa->size, nfa->tags.size())
+ , hidx(HROOT)
+ , step(0)
+ , rule(Rule::NONE)
+ , cursor(string)
+ , marker(string)
+{
+ state.reserve(nfa->size);
+ reach.reserve(nfa->size);
+}
+
+history_t::history_t(size_t nstates, size_t ntags)
+ : nodes()
+ , path1()
+ , path2()
+{
+ nodes.reserve(ntags * nstates);
+ path1.reserve(ntags);
+ path2.reserve(ntags);
+}
+
+uint32_t history_t::push(uint32_t idx, uint32_t step, tag_info_t info)
+{
+ const node_t x = {idx, step, info};
+ nodes.push_back(x);
+ return static_cast<uint32_t>(nodes.size() - 1);
+}
+
+regoff_t history_t::last(uint32_t i, size_t t) const
+{
+ regoff_t off = -1;
+ for (; i != HROOT; ) {
+ const node_t &n = nodes[i];
+ if (n.info.idx == t) {
+ if (!n.info.neg) {
+ off = static_cast<regoff_t>(n.step);
+ }
+ break;
+ }
+ i = n.pred;
+ }
+ return off;
+}
+
+void reconstruct_history(const history_t &hist,
+ tag_path_t &path, uint32_t idx, uint32_t step)
+{
+ path.clear();
+ for (; idx != HROOT; ) {
+ const history_t::node_t &n = hist.nodes[idx];
+ if (n.step != step) break;
+ path.push_back(n.info);
+ idx = n.pred;
+ }
+}
+
+uint32_t index(const nfa_t *nfa, const nfa_state_t *state)
+{
+ return static_cast<uint32_t>(state - nfa->states);
+}
+
+bool conf_t::fin(const conf_t &c)
+{
+ return c.state->type == nfa_state_t::FIN;
+}
+
+bool conf_t::ran(const conf_t &c)
+{
+ return c.state->type == nfa_state_t::RAN;
+}
+
+bool cmp_gtop_t::operator() (const nfa_state_t *x, const nfa_state_t *y) const
+{
+ return x->topord < y->topord;
+}
+
+} // namespace re2c
+
void regfree(regex_t *preg)
{
delete preg->rmgr;
+ delete &preg->nfa->charset;
+ delete &preg->nfa->rules;
+ delete &preg->nfa->tags;
delete preg->nfa;
delete[] preg->pmatch;
- delete[] preg->regs;
- delete[] preg->char2class;
- const dfa_t *dfa = preg->dfa;
- delete &dfa->charset;
- delete &dfa->rules;
- delete &dfa->tags;
- delete &dfa->mtagvers;
- delete[] dfa->finvers;
- delete &dfa->tcpool;
- delete dfa;
+ if (preg->flags & REG_NFA) {
+ delete[] preg->prec_buf1;
+ delete[] preg->prec_buf2;
+ }
+ else {
+ delete[] preg->regs;
+ delete[] preg->char2class;
+ delete &preg->dfa->mtagvers;
+ delete[] preg->dfa->finvers;
+ delete &preg->dfa->tcpool;
+ delete preg->dfa;
+ }
}
static int test(const char *pattern, const char *string
- , size_t nmatch, const regoff_t *submatch)
+ , size_t nmatch, const regoff_t *submatch, int flags)
{
regex_t re;
regmatch_t *pmatch = new regmatch_t[nmatch];
+ const char *prefix = flags & REG_NFA ? "NFA" : "DFA";
int result;
- result = regcomp(&re, pattern, 0);
+ result = regcomp(&re, pattern, flags);
if (result != 0) {
- fprintf(stderr, "regcomp() failed for RE %s\n", pattern);
+ fprintf(stderr, "%s: regcomp() failed for RE %s\n", prefix, pattern);
goto end;
}
- result = regexec(&re, string, nmatch, pmatch, 0);
+ result = regexec(&re, string, nmatch, pmatch, flags);
if (result != 0) {
if (nmatch == 0) {
// failure was expected => it's a success
result = 0;
}
else if (nmatch > 0) {
- fprintf(stderr, "regexec() failed for RE %s and string %s\n"
- , pattern, string);
+ fprintf(stderr, "%s: regexec() failed for RE %s and string %s\n"
+ , prefix, pattern, string);
goto end;
}
}
else if (nmatch == 0) {
// regexed must have failed, something is wrong
result = REG_NOMATCH;
- fprintf(stderr, "regexec() didn't fail while it should"
- " for RE %s and string %s\n", pattern, string);
+ fprintf(stderr, "%s: regexec() didn't fail while it should"
+ " for RE %s and string %s\n", prefix, pattern, string);
goto end;
}
if (so != have.rm_so || eo != have.rm_eo) {
result = 1;
- fprintf(stderr, "incorrect submatch for RE %s and string %s:\n"
+ fprintf(stderr, "%s: incorrect submatch for RE %s and string %s:\n"
"\tpmatch[%u].rm_so = %ld (expected %ld)\n"
"\tpmatch[%u].rm_eo = %ld (expected %ld)\n"
- , pattern, string
+ , prefix, pattern, string
, i, have.rm_so, so
, i, have.rm_eo, eo);
goto end;
// 'long' as vararg requres suffix 'L', which is easy to forget and hard
// to notice (the problem is platform/toolchain-specific).
#define GS static const regoff_t gs[]
-#define T(R,S,gs) e |= test(R,S,sizeof(gs)/sizeof(gs[0])/2,gs);
-#define T0(R,S) e |= test(R,S,0,NULL);
+#define T(R,S,gs) e |= test(R,S,sizeof(gs)/sizeof(gs[0])/2,gs,flags);
+#define T0(R,S) e |= test(R,S,0,NULL,flags);
#define T1(R,S,a,b) { GS = {a,b}; T(R,S,gs); }
#define T2(R,S,a,b,c,d) { GS = {a,b,c,d}; T(R,S,gs); }
#define T3(R,S,a,b,c,d,e,f) { GS = {a,b,c,d,e,f}; T(R,S,gs); }
#define T9(R,S,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r) { GS = {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r}; T(R,S,gs); }
#define T10(R,S,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t) { GS = {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t}; T(R,S,gs); }
-int main()
+int test_all(int flags)
{
int e = 0;
#undef T5
#undef T6
#undef T7
+
+int main()
+{
+ int e = 0;
+
+ e |= test_all(0);
+ e |= test_all(REG_NFA);
+
+ return e;
+}
+
FORBID_COPY(determ_context_t);
};
+// maximum 29-bit (we have 30 bits, but highest must be non-negative)
+static const int32_t MAX_RHO = 0x1fffFFFF;
void tagged_epsilon_closure(determ_context_t &ctx);
void closure_posix(determ_context_t &);
static void reconstruct_history(const tag_history_t &, tag_path_t &, hidx_t);
-// maximum 29-bit (we have 30 bits, but highest must be non-negative)
-static const int32_t MAX_RHO = 0x1fffFFFF;
-
int32_t precedence(determ_context_t &ctx,
const clos_t &x, const clos_t &y, int32_t &rhox, int32_t &rhoy)
{