From ee309d076f5534dfab57010f491d0e558d8c08c7 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sun, 5 Aug 2018 10:55:58 +0100 Subject: [PATCH] Compute and store tag "height" (needed for Okui disambiguation). --- re2c/src/nfa/dump.cc | 1 + re2c/src/re/ast_to_re.cc | 139 +++++++++++++++++---------------------- re2c/src/re/re.h | 4 -- re2c/src/re/tag.cc | 37 ++++++++++- re2c/src/re/tag.h | 28 ++++---- 5 files changed, 108 insertions(+), 101 deletions(-) diff --git a/re2c/src/nfa/dump.cc b/re2c/src/nfa/dump.cc index 9c92c7ec..3af97d3f 100644 --- a/re2c/src/nfa/dump.cc +++ b/re2c/src/nfa/dump.cc @@ -63,6 +63,7 @@ void dump_nfa(const nfa_t &nfa) } else { fprintf(stderr, "↑"); } + fprintf(stderr, "(%d)", tag.height); fprintf(stderr, "\"]\n"); break; } diff --git a/re2c/src/re/ast_to_re.cc b/re2c/src/re/ast_to_re.cc index d99714e1..fd7cb937 100644 --- a/re2c/src/re/ast_to_re.cc +++ b/re2c/src/re/ast_to_re.cc @@ -46,7 +46,33 @@ namespace re2c { * applies to subexpressions that have nested captures. */ -static bool has_tags(const AST *ast) +static bool has_tags(const AST *); +static RE *ast_to_re(RESpec &, const AST *, size_t &, uint32_t); +static RE *re_schar(RE::alc_t &, uint32_t, uint32_t, uint32_t, const opt_t *); +static RE *re_ichar(RE::alc_t &, uint32_t, uint32_t, uint32_t, const opt_t *); +static RE *re_class(RE::alc_t &, uint32_t, uint32_t, const Range *, const opt_t *, Warn &); +static void assert_tags_used_once(const Rule &, const std::vector &); +static void init_rule(Rule &, const Code *, const std::vector &, size_t, size_t); + + +RESpec::RESpec(const std::vector &ast, const opt_t *o, Warn &w) + : alc() + , res() + , charset(*new std::vector) + , tags(*new std::vector) + , rules(*new std::valarray(ast.size())) + , opts(o) + , warn(w) +{ + for (size_t i = 0; i < ast.size(); ++i) { + size_t ltag = tags.size(), ncap = 0; + res.push_back(ast_to_re(*this, ast[i].ast, ncap, 0)); + init_rule(rules[i], ast[i].code, tags, ltag, ncap); + } +} + + +bool has_tags(const AST *ast) { switch (ast->type) { case AST::NIL: @@ -65,48 +91,16 @@ static bool has_tags(const AST *ast) return false; /* unreachable */ } -static size_t fixlen(const AST *ast) -{ - switch (ast->type) { - case AST::NIL: - case AST::TAG: return 0; - case AST::CLS: - case AST::DOT: - case AST::DEFAULT: - case AST::DIFF: return 1; - case AST::STR: return ast->str.chars->size(); - case AST::ALT: { - const size_t - l1 = fixlen(ast->alt.ast1), - l2 = fixlen(ast->alt.ast2); - return l1 == l2 ? l1 : Tag::VARDIST; - } - case AST::CAT: { - const size_t - l1 = fixlen(ast->cat.ast1), - l2 = fixlen(ast->cat.ast2); - return l1 == Tag::VARDIST || l2 == Tag::VARDIST - ? Tag::VARDIST : l1 + l2; - } - case AST::REF: return fixlen(ast->ref.ast); - case AST::ITER: { - const size_t l = fixlen(ast->iter.ast); - const uint32_t m = ast->iter.min, n = ast->iter.max; - return l == Tag::VARDIST || m != n - ? Tag::VARDIST : l * (n - m); - } - case AST::CAP: return fixlen(ast->cap); - } - return Tag::VARDIST; /* unreachable */ -} -static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) +RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap, uint32_t height) { RE::alc_t &alc = spec.alc; std::vector &tags = spec.tags; const opt_t *opts = spec.opts; Warn &warn = spec.warn; + if (ast->type != AST::CAP && ast->type != AST::REF) ++height; + switch (ast->type) { case AST::NIL: return re_nil(alc); @@ -158,29 +152,29 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) && ast->alt.ast1->type != AST::CAP) { // see note [POSIX subexpression hierarchy] t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); } - x = ast_to_re(spec, ast->alt.ast1, ncap); + x = ast_to_re(spec, ast->alt.ast1, ncap, height); x = re_cat(alc, t1, re_cat(alc, x, t2)); if (opts->posix_captures && has_tags(ast) && ast->alt.ast2->type != AST::CAP) { // see note [POSIX subexpression hierarchy] t3 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); t4 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); } - y = ast_to_re(spec, ast->alt.ast2, ncap); + y = ast_to_re(spec, ast->alt.ast2, ncap, height); y = re_cat(alc, t3, re_cat(alc, y, t4)); return re_alt(alc, x, y); } case AST::DIFF: { - RE *x = ast_to_re(spec, ast->diff.ast1, ncap); - RE *y = ast_to_re(spec, ast->diff.ast2, ncap); + RE *x = ast_to_re(spec, ast->diff.ast1, ncap, height); + RE *y = ast_to_re(spec, ast->diff.ast2, ncap, height); if (x->type != RE::SYM || y->type != RE::SYM) { fatal_lc(ast->line, ast->column, "can only difference char sets"); } @@ -194,11 +188,11 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) && a1->type != AST::CAP) { // see note [POSIX subexpression hierarchy] t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); } - x = ast_to_re(spec, ast->cat.ast1, ncap); + x = ast_to_re(spec, ast->cat.ast1, ncap, height); x = re_cat(alc, t1, re_cat(alc, x, t2)); const AST *a2 = ast->alt.ast2; @@ -206,11 +200,11 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) && a2->type != AST::CAP) { // see note [POSIX subexpression hierarchy] t3 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); t4 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(Tag::FICTIVE, false)); + tags.push_back(Tag(Tag::FICTIVE, false, height)); } - y = ast_to_re(spec, ast->cat.ast2, ncap); + y = ast_to_re(spec, ast->cat.ast2, ncap, height); y = re_cat(alc, t3, re_cat(alc, y, t4)); return re_cat(alc, x, y); @@ -225,28 +219,28 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) "simple tags are not allowed with '--posix-captures' option"); } RE *t = re_tag(alc, tags.size(), false); - tags.push_back(Tag(ast->tag.name, ast->tag.history)); + tags.push_back(Tag(ast->tag.name, ast->tag.history, height)); return t; } case AST::CAP: { if (!opts->posix_captures) { - return ast_to_re(spec, ast->cap, ncap); + return ast_to_re(spec, ast->cap, ncap, height); } const AST *x = ast->cap; if (x->type == AST::REF) x = x->ref.ast; RE *t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap, false)); + tags.push_back(Tag(2 * ncap, false, height)); RE *t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap + 1, false)); + tags.push_back(Tag(2 * ncap + 1, false, height)); ++ncap; - return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap), t2)); + return re_cat(alc, t1, re_cat(alc, ast_to_re(spec, x, ncap, height), t2)); } case AST::REF: if (!opts->posix_captures) { - return ast_to_re(spec, ast->ref.ast, ncap); + return ast_to_re(spec, ast->ref.ast, ncap, height); } fatal_l(ast->line, "implicit grouping is forbidden with '--posix-captures'" @@ -266,10 +260,10 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) if (x->type == AST::REF) x = x->ref.ast; t1 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap, m > 1)); + tags.push_back(Tag(2 * ncap, m > 1, height)); t2 = re_tag(alc, tags.size(), false); - tags.push_back(Tag(2 * ncap + 1, false)); + tags.push_back(Tag(2 * ncap + 1, m > 1, height)); ++ncap; } @@ -278,10 +272,10 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) if (m == 0) { y = re_cat(alc, t1, t2); } else if (m == 1) { - y = ast_to_re(spec, x, ncap); + y = ast_to_re(spec, x, ncap, height); y = re_cat(alc, t1, re_cat(alc, y, t2)); } else { - y = ast_to_re(spec, x, ncap); + y = ast_to_re(spec, x, ncap, height); y = re_cat(alc, t1, y); y = re_cat(alc, y, t2); y = re_iter(alc, y, n1, m); @@ -295,6 +289,7 @@ static RE *ast_to_re(RESpec &spec, const AST *ast, size_t &ncap) return NULL; /* unreachable */ } + RE *re_schar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts) { if (!opts->encoding.encode(c)) { @@ -314,6 +309,7 @@ RE *re_schar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const o return NULL; /* unreachable */ } + RE *re_ichar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts) { if (is_alpha(c)) { @@ -325,6 +321,7 @@ RE *re_ichar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const o } } + RE *re_class(RE::alc_t &alc, uint32_t line, uint32_t column, const Range *r, const opt_t *opts, Warn &warn) { if (!r) { @@ -353,7 +350,8 @@ RE *re_class(RE::alc_t &alc, uint32_t line, uint32_t column, const Range *r, con return NULL; /* unreachable */ } -static void assert_tags_used_once(const Rule &rule, const std::vector &tags) + +void assert_tags_used_once(const Rule &rule, const std::vector &tags) { std::set names; const std::string *name = NULL; @@ -368,7 +366,8 @@ static void assert_tags_used_once(const Rule &rule, const std::vector &tags } } -static void init_rule(Rule &rule, const Code *code, const std::vector &tags, + +void init_rule(Rule &rule, const Code *code, const std::vector &tags, size_t ltag, size_t ncap) { rule.code = code; @@ -379,20 +378,4 @@ static void init_rule(Rule &rule, const Code *code, const std::vector &tags assert_tags_used_once(rule, tags); } -RESpec::RESpec(const std::vector &ast, const opt_t *o, Warn &w) - : alc() - , res() - , charset(*new std::vector) - , tags(*new std::vector) - , rules(*new std::valarray(ast.size())) - , opts(o) - , warn(w) -{ - for (size_t i = 0; i < ast.size(); ++i) { - size_t ltag = tags.size(), ncap = 0; - res.push_back(ast_to_re(*this, ast[i].ast, ncap)); - init_rule(rules[i], ast[i].code, tags, ltag, ncap); - } -} - } // namespace re2c diff --git a/re2c/src/re/re.h b/re2c/src/re/re.h index 305c28b4..623f78f0 100644 --- a/re2c/src/re/re.h +++ b/re2c/src/re/re.h @@ -119,10 +119,6 @@ inline RE *re_tag(RE::alc_t &alc, size_t i, bool b) return x; } -RE *re_schar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts); -RE *re_ichar(RE::alc_t &alc, uint32_t line, uint32_t column, uint32_t c, const opt_t *opts); -RE *re_class(RE::alc_t &alc, uint32_t line, uint32_t column, const Range *r, const opt_t *opts, Warn &warn); - } // namespace re2c #endif // _RE2C_RE_RE_ diff --git a/re2c/src/re/tag.cc b/re2c/src/re/tag.cc index 9e6800e9..399bcb79 100644 --- a/re2c/src/re/tag.cc +++ b/re2c/src/re/tag.cc @@ -1,12 +1,45 @@ -#include - #include "src/re/tag.h" +#include +#include namespace re2c { +static uint32_t fix_height(size_t, uint32_t); + + const size_t Tag::RIGHTMOST = std::numeric_limits::max(); const size_t Tag::VARDIST = std::numeric_limits::max(); const size_t Tag::FICTIVE = Tag::RIGHTMOST - 1; + +Tag::Tag(const std::string *nm, bool hi, uint32_t ht) + : name(nm) + , ncap(Tag::RIGHTMOST) + , base(Tag::RIGHTMOST) + , dist(Tag::VARDIST) + , history(hi) + , orbit(false) + , height(fix_height(ncap, ht)) +{} + + +Tag::Tag(size_t nc, bool ob, uint32_t ht) + : name(NULL) + , ncap(nc) + , base(Tag::RIGHTMOST) + , dist(Tag::VARDIST) + , history(false) + , orbit(ob) + , height(fix_height(ncap, ht)) +{} + + +uint32_t fix_height(size_t ncap, uint32_t height) +{ + // height of the closing parenthesis is one less + // than height of the opening parenthesis + return ncap % 2 == 0 ? height + 1 : height; +} + } // namespace re2c diff --git a/re2c/src/re/tag.h b/re2c/src/re/tag.h index 9a9a05e4..24b25305 100644 --- a/re2c/src/re/tag.h +++ b/re2c/src/re/tag.h @@ -5,17 +5,18 @@ #include "src/util/c99_stdint.h" #include #include -#include namespace re2c { typedef int32_t tagver_t; + static const tagver_t TAGVER_BOTTOM = std::numeric_limits::min(); // default value, lowest priority static const tagver_t TAGVER_ZERO = 0; // absense of tag static const tagver_t TAGVER_CURSOR = std::numeric_limits::max(); // current position, highest priority + struct Tag { static const size_t RIGHTMOST; @@ -28,50 +29,43 @@ struct Tag size_t dist; bool history; bool orbit; + uint32_t height; - Tag(const std::string *n, bool h) - : name(n) - , ncap(Tag::RIGHTMOST) - , base(Tag::RIGHTMOST) - , dist(Tag::VARDIST) - , history(h) - , orbit(false) - {} - Tag(size_t c, bool o) - : name(NULL) - , ncap(c) - , base(Tag::RIGHTMOST) - , dist(Tag::VARDIST) - , history(false) - , orbit(o) - {} + Tag(const std::string *nm, bool hi, uint32_t ht); + Tag(size_t nc, bool ob, uint32_t ht); }; + inline bool fixed(const Tag &tag) { return tag.dist != Tag::VARDIST; } + inline bool fictive(const Tag &tag) { return tag.ncap == Tag::FICTIVE; } + inline bool capture(const Tag &tag) { return tag.ncap != Tag::RIGHTMOST; } + inline bool orbit(const Tag &tag) { return tag.orbit; } + inline bool trailing(const Tag &tag) { return !capture(tag) && tag.name == NULL; } + inline bool history(const Tag &tag) { return tag.history; -- 2.40.0