From 4fe8e273b8af6cbd6420e9466d49008051dc3c01 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Fri, 3 Mar 2017 01:47:03 +0000 Subject: [PATCH] Now code generation can handle zero-offset fixed tags with generic API. We'll need this for POSIX captures: in the prsence of orbit tag, opening tag is needed only for disambiguation; after determinization we can pretend that it is fixed on orbit tag with zero offset. This will save some time and space on tag optimizations and maybe reduce the number of tag variables. --- re2c/src/codegen/emit_action.cc | 60 +++++++++++++++++++-------------- re2c/src/ir/nfa/dump.cc | 8 ++--- re2c/src/ir/re/fixed_tags.cc | 7 ++-- re2c/src/ir/re/nullable.cc | 2 +- re2c/src/ir/tag.h | 5 +++ 5 files changed, 48 insertions(+), 34 deletions(-) diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index e26dc5d8..c57f5516 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -361,6 +361,7 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) const std::string &prefix = opts->tags_prefix, &expression = opts->tags_expression; + std::string name, expr; const std::vector &tags = dfa.tags; const tagver_t *fins = dfa.finvers; @@ -369,27 +370,23 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) const Tag &tag = tags[t]; if (fixed(tag)) continue; + expr = vartag_expr(fins[t], prefix, expression); + o.wind(ind); - if (tag.name) { - const std::string - name = *tag.name, - expr = vartag_expr(fins[t], prefix, expression); - if (generic) { - o.wstring(opts->yycopytag).ws(" (").wstring(name) + if (generic) { + if (!trailing(tag)) { + o.wstring(opts->yycopytag).ws(" (").wstring(*tag.name) .ws(", ").wstring(expr).ws(")"); - } else { - o.wstring(name).ws(" = ").wstring(expr); - } - } else if (dfa.oldstyle_ctxmarker) { - if (generic) { + } else if (dfa.oldstyle_ctxmarker) { o.wstring(opts->yyrestorectx).ws(" ()"); } else { - o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker); + o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); } } else { - const std::string expr = vartag_expr(fins[t], prefix, expression); - if (generic) { - o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); + if (!trailing(tag)) { + o.wstring(*tag.name).ws(" = ").wstring(expr); + } else if (dfa.oldstyle_ctxmarker) { + o.wstring(opts->yycursor).ws(" = ").wstring(opts->yyctxmarker); } else { o.wstring(opts->yycursor).ws(" = ").wstring(expr); } @@ -401,21 +398,32 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) for (size_t t = rule.ltag; t < rule.htag; ++t) { const Tag &tag = tags[t]; if (!fixed(tag)) continue; - assert(!generic); + + const size_t dist = tag.dist; + const bool fixed_on_cursor = tag.base == Tag::RIGHTMOST; + expr = fixed_on_cursor ? opts->yycursor + : vartag_expr(fins[tag.base], prefix, expression); o.wind(ind); - if (tag.name) { - o.wstring(*tag.name).ws(" = "); - if (tag.base == Tag::RIGHTMOST) { - // optimize '(YYCTXMARKER + ((YYCURSOR - YCTXMARKER) - tag))' - // to '(YYCURSOR - tag)' - o.wstring(opts->yycursor).ws(" - ").wu64(tag.dist); - } else { - o.wstring(vartag_expr(fins[tag.base], prefix, expression)) - .ws(" - ").wu64(tag.dist); + if (generic) { + assert(dist == 0); + if (!trailing(tag)) { + o.wstring(opts->yycopytag).ws(" (").wstring(*tag.name) + .ws(", ").wstring(expr).ws(")"); + } else if (!fixed_on_cursor) { + assert(!dfa.oldstyle_ctxmarker); + o.wstring(opts->yyrestoretag).ws(" (").wstring(expr).ws(")"); } } else { - o.wstring(opts->yycursor).ws(" -= ").wu64(tag.dist); + if (!trailing(tag)) { + o.wstring(*tag.name).ws(" = ").wstring(expr); + if (dist > 0) o.ws(" - ").wu64(dist); + } else if (!fixed_on_cursor) { + o.wstring(opts->yycursor).ws(" = ").wstring(expr); + if (dist > 0) o.ws(" - ").wu64(dist); + } else if (dist > 0) { + o.wstring(opts->yycursor).ws(" -= ").wu64(dist); + } } o.ws(";\n"); } diff --git a/re2c/src/ir/nfa/dump.cc b/re2c/src/ir/nfa/dump.cc index c0b32ee8..be46fe04 100644 --- a/re2c/src/ir/nfa/dump.cc +++ b/re2c/src/ir/nfa/dump.cc @@ -46,12 +46,12 @@ void dump_nfa(const nfa_t &nfa) break; } case nfa_state_t::TAG: { - const std::string *name = nfa.tags[n->tag.info].name; + const Tag &tag = nfa.tags[n->tag.info]; fprintf(stderr, " n%u -> n%u [label=\"/", i, index(nfa, n->tag.out)); - if (name) { - fprintf(stderr, "%s", name->c_str()); - } else { + if (trailing(tag)) { fprintf(stderr, "%u", (uint32_t)n->rule); + } else { + fprintf(stderr, "%s", tag.name->c_str()); } if (n->tag.bottom) { fprintf(stderr, "↓"); diff --git a/re2c/src/ir/re/fixed_tags.cc b/re2c/src/ir/re/fixed_tags.cc index 5f055919..41a19268 100644 --- a/re2c/src/ir/re/fixed_tags.cc +++ b/re2c/src/ir/re/fixed_tags.cc @@ -39,9 +39,9 @@ static void find_fixed_tags(RE *re, std::vector &tags, find_fixed_tags(re->iter.re, tags, dist, base, false); dist = Tag::VARDIST; break; - case RE::TAG: + case RE::TAG: { + Tag &tag = tags[re->tag.idx]; if (toplevel && dist != Tag::VARDIST) { - Tag &tag = tags[re->tag.idx]; tag.base = base; tag.dist = dist; } else { @@ -50,8 +50,9 @@ static void find_fixed_tags(RE *re, std::vector &tags, dist = 0; } } - if (tags[re->tag.idx].name == NULL) dist = 0; + if (trailing(tag)) dist = 0; break; + } } } diff --git a/re2c/src/ir/re/nullable.cc b/re2c/src/ir/re/nullable.cc index 783e931a..b29ba17b 100644 --- a/re2c/src/ir/re/nullable.cc +++ b/re2c/src/ir/re/nullable.cc @@ -13,7 +13,7 @@ static bool nullable(const RESpec &spec, const RE *re, bool &trail) case RE::ITER: return nullable(spec, re->iter.re, trail); case RE::TAG: - trail |= spec.tags[re->tag.idx].name == NULL; + trail |= trailing(spec.tags[re->tag.idx]); return true; case RE::ALT: return nullable(spec, re->alt.re1, trail) diff --git a/re2c/src/ir/tag.h b/re2c/src/ir/tag.h index 50875224..76608377 100644 --- a/re2c/src/ir/tag.h +++ b/re2c/src/ir/tag.h @@ -30,6 +30,11 @@ inline bool fixed(const Tag &tag) return tag.dist != Tag::VARDIST; } +inline bool trailing(const Tag &tag) +{ + return tag.name == NULL; +} + } // namespace re2c #endif // _RE2C_IR_TAG_ -- 2.40.0