From e9799d313a32aab1b8199d2e21f3a28a99871078 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 18 Feb 2019 16:26:44 +0000 Subject: [PATCH] libre2c: use signed indices in tag history (this way root index -1 is the least one). --- re2c/lib/regex_impl.h | 15 +++++++------ re2c/lib/regexec.cc | 8 ++++--- re2c/lib/regexec_nfa_leftmost.cc | 8 +++---- re2c/lib/regexec_nfa_posix.cc | 21 +++++++++--------- re2c/lib/regexec_nfa_posix_trie.cc | 34 +++++++++++++++--------------- 5 files changed, 45 insertions(+), 41 deletions(-) diff --git a/re2c/lib/regex_impl.h b/re2c/lib/regex_impl.h index d5f76dbb..2e4f4570 100644 --- a/re2c/lib/regex_impl.h +++ b/re2c/lib/regex_impl.h @@ -18,18 +18,21 @@ typedef std::vector tag_path_t; struct history_t { struct node_t { - uint32_t pred; + int32_t pred; uint32_t step; tag_info_t info; uint32_t orig; }; + static const int32_t ROOT; + std::vector nodes; tag_path_t path1; tag_path_t path2; history_t(size_t nstates, size_t ntags); - inline uint32_t push(uint32_t i, uint32_t step, tag_info_t info, uint32_t orig); + inline const node_t &at(int32_t i) const { return nodes[static_cast(i)]; } + inline int32_t push(int32_t i, uint32_t step, tag_info_t info, uint32_t orig); FORBID_COPY(history_t); }; @@ -37,7 +40,7 @@ struct conf_t { nfa_state_t *state; uint32_t origin; - uint32_t thist; + int32_t thist; }; struct ran_or_fin_t @@ -72,7 +75,7 @@ struct simctx_t confset_t reach; confset_t state; history_t hist; - uint32_t hidx; + int32_t hidx; uint32_t step; size_t rule; const char *cursor; @@ -102,11 +105,11 @@ int regexec_nfa_posix_trie(const regex_t *preg, const char *string, size_t nmatc int regexec_nfa_leftmost(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); int regexec_nfa_leftmost_trie(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags); -uint32_t history_t::push(uint32_t idx, uint32_t step, tag_info_t info, uint32_t orig) +int32_t history_t::push(int32_t idx, uint32_t step, tag_info_t info, uint32_t orig) { const node_t x = {idx, step, info, orig}; nodes.push_back(x); - return static_cast(nodes.size() - 1); + return static_cast(nodes.size() - 1); } uint32_t index(const nfa_t *nfa, const nfa_state_t *state) diff --git a/re2c/lib/regexec.cc b/re2c/lib/regexec.cc index 8ddfe2ff..f642bb15 100644 --- a/re2c/lib/regexec.cc +++ b/re2c/lib/regexec.cc @@ -29,6 +29,8 @@ int regexec(const regex_t *preg, const char *string, size_t nmatch, namespace re2c { namespace libre2c { +const int32_t history_t::ROOT = -1; + int finalize(const simctx_t &ctx, const char *string, size_t nmatch, regmatch_t pmatch[]) { @@ -45,8 +47,8 @@ int finalize(const simctx_t &ctx, const char *string, size_t nmatch, bool *done = ctx.done; memset(done, 0, ctx.nsub * sizeof(bool)); - for (size_t i = ctx.hidx; todo > 0 && i != HROOT; ) { - const history_t::node_t &n = ctx.hist.nodes[i]; + for (int32_t i = ctx.hidx; todo > 0 && i != history_t::ROOT; ) { + const history_t::node_t &n = ctx.hist.at(i); const Tag &tag = tags[n.info.idx]; const size_t t = tag.ncap; if (!fictive(tag) && t < nmatch * 2 && !done[t]) { @@ -72,7 +74,7 @@ simctx_t::simctx_t(const regex_t *preg, const char *string) , reach() , state() , hist(nfa->size, nfa->tags.size()) - , hidx(HROOT) + , hidx(history_t::ROOT) , step(0) , rule(Rule::NONE) , cursor(string) diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc index d6ab85cc..7f2bff69 100644 --- a/re2c/lib/regexec_nfa_leftmost.cc +++ b/re2c/lib/regexec_nfa_leftmost.cc @@ -19,7 +19,7 @@ int regexec_nfa_leftmost(const regex_t *preg, const char *string { simctx_t ctx(preg, string); - const conf_t c0 = {ctx.nfa->root, 0, HROOT}; + const conf_t c0 = {ctx.nfa->root, 0, history_t::ROOT}; ctx.reach.push_back(c0); closure_leftmost(ctx); @@ -75,7 +75,7 @@ void reach_on_symbol(simctx_t &ctx, uint32_t sym) if (s->type == nfa_state_t::RAN) { for (const Range *r = s->ran.ran; r; r = r->next()) { if (r->lower() <= sym && sym < r->upper()) { - conf_t c = {s->ran.out, s->coreid, HROOT}; + conf_t c = {s->ran.out, s->coreid, history_t::ROOT}; reach.push_back(c); update_offsets(ctx, *i); break; @@ -147,8 +147,8 @@ void update_offsets(simctx_t &ctx, const conf_t &c) memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t)); memset(done, 0, nsub * sizeof(bool)); - for (uint32_t i = c.thist; i != HROOT; ) { - const history_t::node_t &n = ctx.hist.nodes[i]; + for (int32_t i = c.thist; i != history_t::ROOT; ) { + const history_t::node_t &n = ctx.hist.at(i); const size_t t = n.info.idx; if (!done[t]) { done[t] = true; diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc index 89fd9f45..97db4f12 100644 --- a/re2c/lib/regexec_nfa_posix.cc +++ b/re2c/lib/regexec_nfa_posix.cc @@ -18,7 +18,7 @@ static void closure_posix(simctx_t &); static void update_offsets(simctx_t &ctx, const conf_t &c); static void update_offsets_and_prectbl(simctx_t &); static int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y, int32_t &prec1, int32_t &prec2); -static void unwind(history_t &hist, tag_path_t &path, uint32_t idx); +static void unwind(history_t &hist, tag_path_t &path, int32_t idx); // we *do* want this to be inlined static inline void relax(simctx_t &, const conf_t &, worklist_t &); @@ -29,7 +29,7 @@ int regexec_nfa_posix(const regex_t *preg, const char *string simctx_t ctx(preg, string); const nfa_t *nfa = ctx.nfa; - const conf_t c0 = {nfa->root, 0, HROOT}; + const conf_t c0 = {nfa->root, 0, history_t::ROOT}; ctx.reach.push_back(c0); closure_posix(ctx); @@ -90,7 +90,7 @@ void reach_on_symbol(simctx_t &ctx, uint32_t sym) if (s->type == nfa_state_t::RAN) { for (const Range *r = s->ran.ran; r; r = r->next()) { if (r->lower() <= sym && sym < r->upper()) { - conf_t c = {s->ran.out, s->coreid, HROOT}; + conf_t c = {s->ran.out, s->coreid, history_t::ROOT}; reach.push_back(c); state[j++] = *i; break; @@ -202,8 +202,8 @@ void update_offsets(simctx_t &ctx, const conf_t &c) memcpy(o, ctx.offsets2 + c.origin * nsub, nsub * sizeof(regoff_t)); memset(done, 0, nsub * sizeof(bool)); - for (uint32_t i = c.thist; i != HROOT; ) { - const history_t::node_t &n = ctx.hist.nodes[i]; + for (int32_t i = c.thist; i != history_t::ROOT; ) { + const history_t::node_t &n = ctx.hist.at(i); const Tag &tag = tags[n.info.idx]; const size_t t = tag.ncap; if (!fictive(tag) && !done[t]) { @@ -251,9 +251,8 @@ void update_offsets_and_prectbl(simctx_t &ctx) int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y , int32_t &prec1, int32_t &prec2) { - const uint32_t - idx1 = x.thist, orig1 = x.origin, - idx2 = y.thist, orig2 = y.origin; + const int32_t idx1 = x.thist, idx2 = y.thist; + const uint32_t orig1 = x.origin, orig2 = y.origin; if (idx1 == idx2 && orig1 == orig2) { prec1 = prec2 = MAX_RHO; @@ -326,11 +325,11 @@ int32_t precedence(simctx_t &ctx, const conf_t &x, const conf_t &y } } -void unwind(history_t &hist, tag_path_t &path, uint32_t idx) +void unwind(history_t &hist, tag_path_t &path, int32_t idx) { path.clear(); - for (uint32_t i = idx; i != HROOT; ) { - const history_t::node_t &n = hist.nodes[i]; + for (int32_t i = idx; i != history_t::ROOT; ) { + const history_t::node_t &n = hist.at(i); path.push_back(n.info); i = n.pred; } diff --git a/re2c/lib/regexec_nfa_posix_trie.cc b/re2c/lib/regexec_nfa_posix_trie.cc index 615bc1af..38c30bb1 100644 --- a/re2c/lib/regexec_nfa_posix_trie.cc +++ b/re2c/lib/regexec_nfa_posix_trie.cc @@ -43,11 +43,11 @@ namespace libre2c { static void reach_on_symbol(simctx_t &, uint32_t); static void closure_posix(simctx_t &); static void relax(simctx_t &, const conf_t &, worklist_t &); -static int32_t precedence(simctx_t &ctx, uint32_t xl, uint32_t yl, int32_t &rhox, int32_t &rhoy); -static int32_t precedence_(simctx_t &ctx, uint32_t xl, uint32_t yl, int32_t &rhox, int32_t &rhoy); -static uint32_t unwind(history_t &hist, tag_path_t &path, uint32_t hidx, uint32_t step); -static inline uint32_t get_step(const history_t &hist, uint32_t idx); -static inline uint32_t get_orig(const history_t &hist, uint32_t idx); +static int32_t precedence(simctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy); +static int32_t precedence_(simctx_t &ctx, int32_t xl, int32_t yl, int32_t &rhox, int32_t &rhoy); +static int32_t unwind(history_t &hist, tag_path_t &path, int32_t hidx, uint32_t step); +static inline uint32_t get_step(const history_t &hist, int32_t idx); +static inline uint32_t get_orig(const history_t &hist, int32_t idx); int regexec_nfa_posix_trie(const regex_t *preg, const char *string , size_t nmatch, regmatch_t pmatch[], int) @@ -56,7 +56,7 @@ int regexec_nfa_posix_trie(const regex_t *preg, const char *string const nfa_t *nfa = ctx.nfa; confset_t &state = ctx.state; - const conf_t c0 = {nfa->root, index(nfa, nfa->root), HROOT}; + const conf_t c0 = {nfa->root, index(nfa, nfa->root), history_t::ROOT}; ctx.reach.push_back(c0); closure_posix(ctx); @@ -186,13 +186,13 @@ void relax(simctx_t &ctx, const conf_t &c, worklist_t &wl) } } -int32_t precedence(simctx_t &ctx, uint32_t idx1, uint32_t idx2 +int32_t precedence(simctx_t &ctx, int32_t idx1, int32_t idx2 , int32_t &prec1, int32_t &prec2) { int32_t prec = 0; // use the same cache entry for (x, y) and (y, x) - uint32_t k1 = idx1, k2 = idx2; + uint32_t k1 = static_cast(idx1), k2 = static_cast(idx2); bool invert = k2 < k1; if (invert) std::swap(k1, k2); const uint64_t key = (static_cast(k1) << 32) | k2; @@ -223,7 +223,7 @@ int32_t precedence(simctx_t &ctx, uint32_t idx1, uint32_t idx2 return prec; } -int32_t precedence_(simctx_t &ctx, uint32_t idx1, uint32_t idx2 +int32_t precedence_(simctx_t &ctx, int32_t idx1, int32_t idx2 , int32_t &prec1, int32_t &prec2) { if (idx1 == idx2) { @@ -316,21 +316,21 @@ end: return prec; } -uint32_t get_step(const history_t &hist, uint32_t idx) +uint32_t get_step(const history_t &hist, int32_t idx) { - return idx == HROOT ? 0 : hist.nodes[idx].step; + return idx == history_t::ROOT ? 0 : hist.at(idx).step; } -uint32_t get_orig(const history_t &hist, uint32_t idx) +uint32_t get_orig(const history_t &hist, int32_t idx) { - return idx == HROOT ? 0 : hist.nodes[idx].orig; + return idx == history_t::ROOT ? 0 : hist.at(idx).orig; } -uint32_t unwind(history_t &hist, tag_path_t &path, uint32_t idx, uint32_t step) +int32_t unwind(history_t &hist, tag_path_t &path, int32_t idx, uint32_t step) { - uint32_t new_idx = HROOT; - for (uint32_t i = idx; i != HROOT; ) { - const history_t::node_t &n = hist.nodes[i]; + int32_t new_idx = history_t::ROOT; + for (int32_t i = idx; i != history_t::ROOT; ) { + const history_t::node_t &n = hist.at(i); if (n.step < step) { new_idx = i; break; -- 2.40.0