From: Ulya Trofimovich Date: Mon, 18 Feb 2019 22:48:00 +0000 (+0000) Subject: libre2c: hid some of the implementation details behind a pointer in regex_t struct. X-Git-Tag: 1.2~159 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=662c7e874512b1e41990e98c36a8c7d36bc48a2f;p=re2c libre2c: hid some of the implementation details behind a pointer in regex_t struct. --- diff --git a/re2c/lib/regcomp.cc b/re2c/lib/regcomp.cc index 6da95a6c..ce0c8284 100644 --- a/re2c/lib/regcomp.cc +++ b/re2c/lib/regcomp.cc @@ -1,5 +1,6 @@ #include "lib/lex.h" #include "lib/regex.h" +#include "lib/regex_impl.h" #include "src/options/opt.h" #include "src/nfa/nfa.h" #include "src/dfa/dfa.h" @@ -46,19 +47,7 @@ int regcomp(regex_t *preg, const char *pattern, int cflags) dfa_t *dfa = NULL; if (cflags & REG_NFA) { - const size_t ntags = 2 * (preg->re_nsub - 1); - preg->done = new bool[ntags]; - if (!(cflags & REG_TRIE)) { - const size_t sz = ntags * nfa->ncores; - preg->offsets1 = new regoff_t[sz]; - preg->offsets2 = new regoff_t[sz]; - preg->offsets3 = new regoff_t[ntags]; - } - if (!(cflags & REG_LEFTMOST) && !(cflags & REG_TRIE)) { - const size_t sz = nfa->ncores * nfa->ncores; - preg->prectbl1 = new int32_t[sz]; - preg->prectbl2 = new int32_t[sz]; - } + preg->simctx = new libre2c::simctx_t(nfa, preg->re_nsub, cflags); } else { preg->char2class = new size_t[256]; diff --git a/re2c/lib/regex.h b/re2c/lib/regex.h index 76b638c6..1b9c1f01 100644 --- a/re2c/lib/regex.h +++ b/re2c/lib/regex.h @@ -7,11 +7,15 @@ // fwd namespace re2c { - struct nfa_t; struct dfa_t; struct RangeMgr; +} // namespace re2c +namespace re2c { +namespace libre2c { +struct simctx_t; +} // namespace libre2c } // namespace re2c typedef ptrdiff_t regoff_t; @@ -44,13 +48,8 @@ struct regex_t regmatch_t *pmatch; regoff_t *regs; size_t *char2class; - int *prectbl1; - int *prectbl2; - regoff_t *offsets1; - regoff_t *offsets2; - regoff_t *offsets3; - bool *done; int flags; + re2c::libre2c::simctx_t *simctx; }; static const int REG_NOMATCH = INT_MAX; diff --git a/re2c/lib/regex_impl.h b/re2c/lib/regex_impl.h index 4cfd0a0d..d087efa3 100644 --- a/re2c/lib/regex_impl.h +++ b/re2c/lib/regex_impl.h @@ -76,12 +76,19 @@ typedef std::priority_queue struct simctx_t { const nfa_t *nfa; + const size_t nsub; + const int flags; + confset_t reach; confset_t state; + history_t hist; int32_t hidx; + uint32_t step; + size_t rule; + const char *cursor; const char *marker; @@ -95,16 +102,14 @@ struct simctx_t int32_t *prectbl2; cache_t cache; - const bool use_gtop; std::vector gor1_topsort; std::vector gor1_linear; std::vector gtop_heap_storage; cmp_gtop_t gtop_cmp; gtop_heap_t gtop_heap; - const size_t nsub; - - simctx_t(const regex_t *preg, const char *string); + simctx_t(const nfa_t *nfa, size_t re_nsub, int flags); + ~simctx_t(); FORBID_COPY(simctx_t); }; diff --git a/re2c/lib/regexec.cc b/re2c/lib/regexec.cc index edfc6db4..98e1a538 100644 --- a/re2c/lib/regexec.cc +++ b/re2c/lib/regexec.cc @@ -69,40 +69,70 @@ int finalize(const simctx_t &ctx, const char *string, size_t nmatch, return 0; } -simctx_t::simctx_t(const regex_t *preg, const char *string) - : nfa(preg->nfa) +simctx_t::simctx_t(const nfa_t *nfa, size_t re_nsub, int flags) + : nfa(nfa) + , nsub(2 * (re_nsub - 1)) + , flags(flags) , reach() , state() , hist(nfa->size, nfa->tags.size()) , hidx(history_t::ROOT) , step(0) , rule(Rule::NONE) - , cursor(string) - , marker(string) - , offsets1(preg->offsets1) - , offsets2(preg->offsets2) - , offsets3(preg->offsets3) - , done(preg->done) - , prectbl1(preg->prectbl1) - , prectbl2(preg->prectbl2) + , cursor(NULL) + , marker(NULL) + , offsets1(NULL) + , offsets2(NULL) + , offsets3(NULL) + , done(NULL) + , prectbl1(NULL) + , prectbl2(NULL) , cache() - , use_gtop(preg->flags & REG_GTOP) , gor1_topsort() , gor1_linear() , gtop_heap_storage() , gtop_cmp() , gtop_heap(gtop_cmp, gtop_heap_storage) - , nsub(2 * (preg->re_nsub - 1)) { - state.reserve(nfa->size); - reach.reserve(nfa->size); + const size_t + nstates = nfa->size, + ncores = nfa->ncores; - if (use_gtop) { - gtop_heap_storage.reserve(nfa->size); + state.reserve(nstates); + reach.reserve(nstates); + + done = new bool[nsub]; + + if (!(flags & REG_TRIE)) { + offsets1 = new regoff_t[nsub * ncores]; + offsets2 = new regoff_t[nsub * ncores]; + offsets3 = new regoff_t[nsub]; + } + if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) { + prectbl1 = new int32_t[ncores * ncores]; + prectbl2 = new int32_t[ncores * ncores]; + } + + if (flags & REG_GTOP) { + gtop_heap_storage.reserve(nstates); } else { - gor1_topsort.reserve(nfa->size); - gor1_linear.reserve(nfa->size); + gor1_topsort.reserve(nstates); + gor1_linear.reserve(nstates); + } +} + +simctx_t::~simctx_t() +{ + delete[] done; + if (!(flags & REG_TRIE)) { + delete[] offsets1; + delete[] offsets2; + delete[] offsets3; + } + if (!(flags & REG_LEFTMOST) && !(flags & REG_TRIE)) { + delete[] prectbl1; + delete[] prectbl2; } } diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc index 70432b50..54458e72 100644 --- a/re2c/lib/regexec_nfa_leftmost.cc +++ b/re2c/lib/regexec_nfa_leftmost.cc @@ -17,7 +17,8 @@ static void update_offsets(simctx_t &ctx, const conf_t &c); int regexec_nfa_leftmost(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) { - simctx_t ctx(preg, string); + simctx_t &ctx = *preg->simctx; + ctx.cursor = ctx.marker = string; const conf_t c0(ctx.nfa->root, 0, history_t::ROOT); ctx.reach.push_back(c0); diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc index 6b03498e..de685c65 100644 --- a/re2c/lib/regexec_nfa_posix.cc +++ b/re2c/lib/regexec_nfa_posix.cc @@ -37,7 +37,8 @@ static inline void relax_gtop(simctx_t &, const conf_t &); int regexec_nfa_posix(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) { - simctx_t ctx(preg, string); + simctx_t &ctx = *preg->simctx; + ctx.cursor = ctx.marker = string; const nfa_t *nfa = ctx.nfa; const conf_t c0(nfa->root, 0, history_t::ROOT); @@ -118,7 +119,7 @@ void reach_on_symbol(simctx_t &ctx, uint32_t sym) void closure_posix(simctx_t &ctx) { - if (ctx.use_gtop) { + if (ctx.flags & REG_GTOP) { closure_posix_gtop(ctx); } else { diff --git a/re2c/lib/regexec_nfa_posix_trie.cc b/re2c/lib/regexec_nfa_posix_trie.cc index 68422656..3f058f3a 100644 --- a/re2c/lib/regexec_nfa_posix_trie.cc +++ b/re2c/lib/regexec_nfa_posix_trie.cc @@ -52,7 +52,8 @@ static inline uint32_t get_orig(const history_t &hist, int32_t idx); int regexec_nfa_posix_trie(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) { - simctx_t ctx(preg, string); + simctx_t &ctx = *preg->simctx; + ctx.cursor = ctx.marker = string; const nfa_t *nfa = ctx.nfa; confset_t &state = ctx.state; diff --git a/re2c/lib/regfree.cc b/re2c/lib/regfree.cc index a900a64b..3dfafa22 100644 --- a/re2c/lib/regfree.cc +++ b/re2c/lib/regfree.cc @@ -1,4 +1,5 @@ #include "lib/regex.h" +#include "lib/regex_impl.h" #include "src/nfa/nfa.h" #include "src/dfa/dfa.h" @@ -14,18 +15,8 @@ void regfree(regex_t *preg) delete preg->nfa; delete[] preg->pmatch; - const int f = preg->flags; - if (f & REG_NFA) { - delete[] preg->done; - if (!(f & REG_TRIE)) { - delete[] preg->offsets1; - delete[] preg->offsets2; - delete[] preg->offsets3; - } - if (!(f & REG_LEFTMOST) && !(f & REG_TRIE)) { - delete[] preg->prectbl1; - delete[] preg->prectbl2; - } + if (preg->flags & REG_NFA) { + delete preg->simctx; } else { delete[] preg->regs;