From 3f5fa5cc4e430561bea1453f2e0eabf402e7a57a Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sat, 4 Aug 2018 10:44:56 +0100 Subject: [PATCH] Always add structural tags to the RHS of alternative/catenation in POSIX captures. (Preliminary work before switching from Kuklewicz POSIX disambiguation algorithm to Okui algorithm.) --- re2c/src/re/ast_to_re.cc | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/re2c/src/re/ast_to_re.cc b/re2c/src/re/ast_to_re.cc index c1a13a99..d99714e1 100644 --- a/re2c/src/re/ast_to_re.cc +++ b/re2c/src/re/ast_to_re.cc @@ -152,7 +152,8 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) // see note [default regexp] return re_sym(alc, Range::ran(0, opts->encoding.nCodeUnits())); case AST::ALT: { - RE *t1 = NULL, *t2 = NULL, *x, *y; + RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y; + if (opts->posix_captures && has_tags(ast) && ast->alt.ast1->type != AST::CAP) { // see note [POSIX subexpression hierarchy] @@ -163,7 +164,18 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) } x = ast_to_re(spec, ast->alt.ast1, ncap); x = re_cat(alc, t1, re_cat(alc, x, t2)); + + if (opts->posix_captures && has_tags(ast) + && ast->alt.ast2->type != AST::CAP) { + // see note [POSIX subexpression hierarchy] + t3 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false)); + t4 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false)); + } y = ast_to_re(spec, ast->alt.ast2, ncap); + y = re_cat(alc, t3, re_cat(alc, y, t4)); + return re_alt(alc, x, y); } case AST::DIFF: { @@ -175,10 +187,11 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) return re_class(alc, ast->line, ast->column, Range::sub(x->sym, y->sym), opts, warn); } case AST::CAT: { - RE *t1 = NULL, *t2 = NULL, *x, *y; + RE *t1 = NULL, *t2 = NULL, *t3 = NULL, *t4 = NULL, *x, *y; + const AST *a1 = ast->alt.ast1; if (opts->posix_captures && has_tags(ast) - && a1->type != AST::CAP && fixlen(a1) == Tag::VARDIST) { + && a1->type != AST::CAP) { // see note [POSIX subexpression hierarchy] t1 = re_tag(alc, tags.size(), false); tags.push_back(Tag(Tag::FICTIVE, false)); @@ -187,7 +200,19 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) } x = ast_to_re(spec, ast->cat.ast1, ncap); x = re_cat(alc, t1, re_cat(alc, x, t2)); + + const AST *a2 = ast->alt.ast2; + if (opts->posix_captures && has_tags(ast) + && a2->type != AST::CAP) { + // see note [POSIX subexpression hierarchy] + t3 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false)); + t4 = re_tag(alc, tags.size(), false); + tags.push_back(Tag(Tag::FICTIVE, false)); + } y = ast_to_re(spec, ast->cat.ast2, ncap); + y = re_cat(alc, t3, re_cat(alc, y, t4)); + return re_cat(alc, x, y); } case AST::TAG: { -- 2.40.0