From c960cc011fd865a72930c7872bf5f978d548a3f9 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 5 Feb 2019 14:57:42 +0000 Subject: [PATCH] libre2c: performance optimization: get all tag values in one scan of history. --- re2c/lib/regexec_nfa_leftmost.cc | 17 ---------- re2c/lib/regexec_nfa_posix.cc | 58 ++++++++++++++------------------ 2 files changed, 25 insertions(+), 50 deletions(-) diff --git a/re2c/lib/regexec_nfa_leftmost.cc b/re2c/lib/regexec_nfa_leftmost.cc index ed3c5f63..086817f1 100644 --- a/re2c/lib/regexec_nfa_leftmost.cc +++ b/re2c/lib/regexec_nfa_leftmost.cc @@ -27,7 +27,6 @@ struct history_t inline history_t(size_t nstates, size_t ntags); inline uint32_t push(uint32_t i, uint32_t step, tag_info_t info); - inline regoff_t last(uint32_t i, size_t t) const; FORBID_COPY(history_t); }; @@ -225,21 +224,5 @@ uint32_t history_t::push(uint32_t idx, uint32_t step, tag_info_t info) return static_cast(nodes.size() - 1); } -regoff_t history_t::last(uint32_t i, size_t t) const -{ - regoff_t off = -1; - for (; i != HROOT; ) { - const node_t &n = nodes[i]; - if (n.tag == t) { - if (!n.neg) { - off = static_cast(n.step); - } - break; - } - i = n.pred; - } - return off; -} - } // namespace re2c diff --git a/re2c/lib/regexec_nfa_posix.cc b/re2c/lib/regexec_nfa_posix.cc index e1c1c02a..fc695f1b 100644 --- a/re2c/lib/regexec_nfa_posix.cc +++ b/re2c/lib/regexec_nfa_posix.cc @@ -30,7 +30,6 @@ struct history_t inline history_t(size_t nstates, size_t ntags); inline uint32_t push(uint32_t i, uint32_t step, tag_info_t info); - inline regoff_t last(uint32_t i, size_t t) const; inline void reconstruct(tag_path_t &, uint32_t, uint32_t); FORBID_COPY(history_t); }; @@ -107,29 +106,38 @@ int regexec_nfa_posix(const regex_t *preg, const char *string, size_t nmatch, ctx.cursor = ctx.marker; if (ctx.rule != Rule::NONE) { - regmatch_t *m = pmatch; result = 0; + regmatch_t *m = pmatch; m->rm_so = 0; m->rm_eo = ctx.cursor - string - 1; - ++m; - - const Rule &rule = nfa->rules[0]; - for (size_t t = rule.ltag; t < rule.htag; ++t) { - const Tag &tag = nfa->tags[t]; - - if (fictive(tag)) continue; - if (tag.ncap >= nmatch * 2) break; - const regoff_t off = ctx.hist.last(ctx.hidx, t); - if (tag.ncap % 2 == 0) { - m->rm_so = off; - } - else { - m->rm_eo = off; - ++m; + size_t todo = nmatch * 2, ntags = nfa->tags.size(); + bool *done = new bool[ntags]; + memset(done, 0, ntags * sizeof(bool)); + + for (size_t i = ctx.hidx; todo > 0 && i != HROOT; ) { + const history_t::node_t &n = ctx.hist.nodes[i]; + const size_t t = n.info.idx; + if (!done[t]) { + done[t] = true; + const Tag &tag = nfa->tags[t]; + if (!fictive(tag) && tag.ncap < nmatch * 2) { + --todo; + const regoff_t off = n.info.neg ? -1 : static_cast(n.step); + m = &pmatch[tag.ncap / 2 + 1]; + if (tag.ncap % 2 == 0) { + m->rm_so = off; + } + else { + m->rm_eo = off; + } + } } + i = n.pred; } + + delete[] done; } return result; @@ -386,22 +394,6 @@ uint32_t history_t::push(uint32_t idx, uint32_t step, tag_info_t info) return static_cast(nodes.size() - 1); } -regoff_t history_t::last(uint32_t i, size_t t) const -{ - regoff_t off = -1; - for (; i != HROOT; ) { - const node_t &n = nodes[i]; - if (n.info.idx == t) { - if (!n.info.neg) { - off = static_cast(n.step); - } - break; - } - i = n.pred; - } - return off; -} - void history_t::reconstruct(tag_path_t &path, uint32_t idx, uint32_t step) { path.clear(); -- 2.50.1