From 27c0562cbbe5355d3f5136ea79d6922df3d47b27 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sun, 27 Jan 2019 10:23:23 +0000 Subject: [PATCH] libre2c_posix: extended regex_t structure to hold more submatch data. Added fields: - re_nsub: total number of submatch groups, required by POSIX standard - pmatch: buffer for submatch results, usually supplied by the user to regexec(), but we allow to do the allocation and storage in regex_t. This is convenient for users that have hard time managing memory, e.g. java bindings to libre2c_posix. - regs: buffer for internal use by regexec(), strored in regex_t to avoid repeated memory allocation on each call to regexec() with the same regex. --- re2c/libre2c_posix/regcomp.cc | 7 +++++++ re2c/libre2c_posix/regex-impl.h | 5 ++++- re2c/libre2c_posix/regexec.cc | 3 +-- re2c/libre2c_posix/regfree.cc | 4 +++- re2c/libre2c_posix/test.cpp | 3 +++ 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/re2c/libre2c_posix/regcomp.cc b/re2c/libre2c_posix/regcomp.cc index 5bd53ff0..356354e4 100644 --- a/re2c/libre2c_posix/regcomp.cc +++ b/re2c/libre2c_posix/regcomp.cc @@ -35,6 +35,7 @@ int regcomp(regex_t *preg, const char *pattern, int /* cflags */) arv.push_back(ar); RESpec re(arv, opt, warn, *preg->rmgr); + preg->char2class = new size_t[256]; split_charset(re); for (uint32_t i = 1, j = 0; i < re.charset.size(); ++i) { for (; j < re.charset[i]; ++j) { @@ -50,6 +51,12 @@ int regcomp(regex_t *preg, const char *pattern, int /* cflags */) dfa_t *dfa = new dfa_t(*nfa, opt, "", warn); +// compact_and_optimize_tags(opt, *dfa); + + DASSERT(dfa->rules.size() == 1); + preg->re_nsub = dfa->rules[0].ncap + 1; + preg->pmatch = new regmatch_t[preg->re_nsub]; + preg->regs = new regoff_t[dfa->maxtagver + 1]; preg->nfa = nfa; preg->dfa = dfa; diff --git a/re2c/libre2c_posix/regex-impl.h b/re2c/libre2c_posix/regex-impl.h index b2b0520c..7fac2352 100644 --- a/re2c/libre2c_posix/regex-impl.h +++ b/re2c/libre2c_posix/regex-impl.h @@ -17,10 +17,13 @@ struct RangeMgr; struct regex_t { + size_t re_nsub; re2c::RangeMgr *rmgr; const re2c::nfa_t *nfa; const re2c::dfa_t *dfa; - size_t char2class[256]; + regmatch_t *pmatch; + regoff_t *regs; + size_t *char2class; }; #endif // _RE2C_LIB_REGEX_IMPL_ diff --git a/re2c/libre2c_posix/regexec.cc b/re2c/libre2c_posix/regexec.cc index 6e3f46f9..a20855fa 100644 --- a/re2c/libre2c_posix/regexec.cc +++ b/re2c/libre2c_posix/regexec.cc @@ -33,7 +33,7 @@ int regexec(const regex_t *preg, const char *string, size_t nmatch, { const dfa_t *dfa = preg->dfa; int result = REG_NOMATCH; - regoff_t *regs = new regoff_t[dfa->maxtagver + 1]; + regoff_t *regs = preg->regs; size_t i = 0; const char *p = string, *q = p; const dfa_state_t *s, *x = NULL; @@ -93,6 +93,5 @@ int regexec(const regex_t *preg, const char *string, size_t nmatch, } } - delete[] regs; return result; } diff --git a/re2c/libre2c_posix/regfree.cc b/re2c/libre2c_posix/regfree.cc index c4aeb579..2b85c161 100644 --- a/re2c/libre2c_posix/regfree.cc +++ b/re2c/libre2c_posix/regfree.cc @@ -9,8 +9,10 @@ using namespace re2c; void regfree(regex_t *preg) { delete preg->rmgr; - delete preg->nfa; + delete[] preg->pmatch; + delete[] preg->regs; + delete[] preg->char2class; const dfa_t *dfa = preg->dfa; delete &dfa->charset; diff --git a/re2c/libre2c_posix/test.cpp b/re2c/libre2c_posix/test.cpp index 4f4dbdce..7e29ade3 100644 --- a/re2c/libre2c_posix/test.cpp +++ b/re2c/libre2c_posix/test.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -39,6 +40,8 @@ static int test(const char *pattern, const char *string goto end; } + assert(nmatch == 0 || nmatch == re.re_nsub); + for (uint32_t i = 0; i < nmatch; ++i) { regoff_t so = submatch[2 * i]; regoff_t eo = submatch[2 * i + 1]; -- 2.40.0