From 2b7aec358e324999d30520c542414591a2838710 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Wed, 5 Oct 2016 17:26:28 +0100 Subject: [PATCH] Backup overwritten tags in fallback states. We need to backup tags in fallback states: these tags may be updated on some non-accepting path from fallback state, and if the attempt to match longer rule fails, there's no way to restore the overwritten tags. Such tags may appear in self-overlapping rules, e.g.: (@p "ab")* {} We create a special backup variable for each potentially overwritten tag. Backup tags do not participate in deduplication. --- re2c/Makefile.am | 1 + re2c/src/codegen/bitmap.cc | 4 +- re2c/src/codegen/emit.h | 10 +-- re2c/src/codegen/emit_action.cc | 64 +++++++++++------ re2c/src/codegen/emit_dfa.cc | 21 +++++- re2c/src/codegen/go.h | 18 ++--- re2c/src/codegen/go_construct.cc | 6 +- re2c/src/codegen/go_emit.cc | 14 ++-- re2c/src/ir/adfa/action.h | 2 +- re2c/src/ir/adfa/adfa.cc | 7 +- re2c/src/ir/adfa/adfa.h | 7 +- re2c/src/ir/adfa/prepare.cc | 15 ++-- re2c/src/ir/compile.cc | 2 + re2c/src/ir/dfa/dead_rules.cc | 2 +- re2c/src/ir/dfa/determinization.cc | 5 +- re2c/src/ir/dfa/dfa.h | 10 +-- re2c/src/ir/dfa/fallback_tags.cc | 101 +++++++++++++++++++++++++++ re2c/src/ir/dfa/minimization.cc | 6 +- re2c/src/ir/dfa/tag_deduplication.cc | 21 +++--- re2c/src/ir/skeleton/skeleton.cc | 5 +- re2c/src/ir/tag.h | 31 ++++++++ re2c/test/tags/fallback3.i--tags.c | 73 +++++++++++++++++++ re2c/test/tags/fallback3.i--tags.re | 11 +++ 23 files changed, 354 insertions(+), 82 deletions(-) create mode 100644 re2c/src/ir/dfa/fallback_tags.cc create mode 100644 re2c/test/tags/fallback3.i--tags.c create mode 100644 re2c/test/tags/fallback3.i--tags.re diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 1a5cbbd0..ba1b4c00 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -94,6 +94,7 @@ SRC = \ src/ir/dfa/closure.cc \ src/ir/dfa/dead_rules.cc \ src/ir/dfa/determinization.cc \ + src/ir/dfa/fallback_tags.cc \ src/ir/dfa/fillpoints.cc \ src/ir/dfa/find_state.cc \ src/ir/dfa/minimization.cc \ diff --git a/re2c/src/codegen/bitmap.cc b/re2c/src/codegen/bitmap.cc index b4c3e21d..173a121b 100644 --- a/re2c/src/codegen/bitmap.cc +++ b/re2c/src/codegen/bitmap.cc @@ -160,8 +160,8 @@ bool matches(const Span * b1, uint32_t n1, const State * s1, const Span * b2, ui // might go to the same state, but have different tag sets if (lb1 != lb2 || b1->ub != b2->ub - || b1->tags != ZERO_TAGS - || b2->tags != ZERO_TAGS) + || !b1->tags.empty() + || !b2->tags.empty()) { return false; } diff --git a/re2c/src/codegen/emit.h b/re2c/src/codegen/emit.h index 07f06b96..ff105bff 100644 --- a/re2c/src/codegen/emit.h +++ b/re2c/src/codegen/emit.h @@ -9,14 +9,16 @@ namespace re2c { void emit_action(OutputFile &o, uint32_t ind, bool &readCh, const DFA &dfa, const State *s, const std::set &used_labels); void gen_goto(OutputFile &o, uint32_t ind, bool &readCh, - const State *to, const DFA &dfa, size_t tags); + const State *to, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback); void gen_goto_case(OutputFile &o, uint32_t ind, bool &readCh, - const State *to, const DFA &dfa, size_t tags); + const State *to, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback); void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh, - const State *to, const DFA &dfa, size_t tags); -void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, size_t tags); + const State *to, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback); +void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, const tagcmd_t &tags, bool restore_fallback); std::string vartag_name(const std::string *name, size_t rule); std::string vartag_expr(const std::string *name, size_t rule); +std::string vartag_name_fallback(const Tag &tag); +std::string vartag_expr_fallback(const Tag &tag); std::string subst_tags(const std::string &action, const std::valarray &tags, size_t ltag, size_t htag); diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index a29ca408..fb1a5d40 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -140,7 +140,7 @@ void emit_accept_binary(OutputFile &o, uint32_t ind, bool &readCh, o.wind(--ind).ws("}\n"); } else { const accept_t &acc = *s->action.info.accepts; - gen_goto(o, ind, readCh, acc[l].first, dfa, acc[l].second); + gen_goto(o, ind, readCh, acc[l].first, dfa, acc[l].second, true); } } @@ -162,13 +162,13 @@ void emit_accept(OutputFile &o, uint32_t ind, bool &readCh, // only one possible 'yyaccept' value: unconditional jump if (nacc == 1) { - gen_goto(o, ind, readCh, acc[0].first, dfa, acc[0].second); + gen_goto(o, ind, readCh, acc[0].first, dfa, acc[0].second, true); return; } bool have_tags = false; for (size_t i = 0; i < nacc; ++i) { - if (acc[i].second != 0) { + if (!acc[i].second.empty()) { have_tags = true; break; } @@ -202,10 +202,10 @@ void emit_accept(OutputFile &o, uint32_t ind, bool &readCh, o.wind(ind).ws("switch (").wstring(opts->yyaccept).ws(") {\n"); for (uint32_t i = 0; i < nacc - 1; ++i) { o.wind(ind).ws("case ").wu32(i).ws(": "); - gen_goto_case(o, ind, readCh, acc[i].first, dfa, acc[i].second); + gen_goto_case(o, ind, readCh, acc[i].first, dfa, acc[i].second, true); } o.wind(ind).ws("default:"); - gen_goto_case(o, ind, readCh, acc[nacc - 1].first, dfa, acc[nacc - 1].second); + gen_goto_case(o, ind, readCh, acc[nacc - 1].first, dfa, acc[nacc - 1].second, true); o.wind(ind).ws("}\n"); } @@ -317,36 +317,39 @@ void genSetState(OutputFile &o, uint32_t ind, uint32_t fillIndex) } void gen_goto_case(OutputFile &o, uint32_t ind, bool &readCh, - const State *to, const DFA &dfa, size_t tags) + const State *to, const DFA &dfa, const tagcmd_t &tags, + bool restore_fallback) { - const bool multiline = readCh || (tags != ZERO_TAGS); + const bool multiline = readCh || !tags.empty(); if (multiline) { o.ws("\n"); - gen_goto(o, ind + 1, readCh, to, dfa, tags); + gen_goto(o, ind + 1, readCh, to, dfa, tags, restore_fallback); } else { - gen_goto(o, 1, readCh, to, dfa, tags); + gen_goto(o, 1, readCh, to, dfa, tags, restore_fallback); } } void gen_goto_if(OutputFile &o, uint32_t ind, bool &readCh, - const State *to, const DFA &dfa, size_t tags) + const State *to, const DFA &dfa, const tagcmd_t &tags, + bool restore_fallback) { const int32_t linecount = (readCh && to != NULL) - + (tags != ZERO_TAGS) + + !tags.empty() + (to != NULL); if (linecount > 1) { o.ws("{\n"); - gen_goto(o, ind + 1, readCh, to, dfa, tags); + gen_goto(o, ind + 1, readCh, to, dfa, tags, restore_fallback); o.wind(ind).ws("}\n"); } else { - gen_goto(o, 0, readCh, to, dfa, tags); + gen_goto(o, 0, readCh, to, dfa, tags, restore_fallback); } } void gen_goto(OutputFile &o, uint32_t ind, bool &readCh, - const State *to, const DFA &dfa, size_t tags) + const State *to, const DFA &dfa, const tagcmd_t &tags, + bool restore_fallback) { if (to == NULL) { readCh = false; @@ -355,23 +358,40 @@ void gen_goto(OutputFile &o, uint32_t ind, bool &readCh, o.wstring(opts->input_api.stmt_peek(ind)); readCh = false; } - gen_settags(o, ind, dfa, tags); + gen_settags(o, ind, dfa, tags, restore_fallback); if (to) { o.wind(ind).ws("goto ").wstring(opts->labelPrefix) .wlabel(to->label).ws(";\n"); } } -void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, size_t tags) +void gen_settags(OutputFile &o, uint32_t ind, const DFA &dfa, + const tagcmd_t &cmd, bool restore_fallback) { - if (tags != ZERO_TAGS) { - if (dfa.basetag) { - o.wstring(opts->input_api.stmt_dist(ind, - dfa.tagpool[tags], dfa.tags)); - } else { - o.wstring(opts->input_api.stmt_backupctx(ind)); + if (cmd.empty()) return; + + if (!dfa.basetag) { + assert(cmd.copy == ZERO_TAGS); + o.wstring(opts->input_api.stmt_backupctx(ind)); + return; + } + + const std::valarray &tags = dfa.tags; + const size_t ntags = tags.size(); + const bool *copy = dfa.tagpool[cmd.copy]; + for (size_t t = 0; t < ntags; ++t) { + if (copy[t]) { + const Tag &tag = tags[t]; + std::string + x = vartag_expr(tag.name, tag.rule), + y = vartag_expr_fallback(tag); + if (restore_fallback) std::swap(x, y); + o.wind(ind).wstring(y).ws(" = ").wstring(x).ws(";\n"); } } + if (cmd.set != ZERO_TAGS) { + o.wstring(opts->input_api.stmt_dist(ind, dfa.tagpool[cmd.set], tags)); + } } } // namespace re2c diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 5c220d6b..e6146b36 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -145,12 +145,19 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra std::set tagnames; if (basetag) { - for (size_t i = 0; i < tags.size(); ++i) { + const size_t ntags = tags.size(); + for (size_t i = 0; i < ntags; ++i) { const Tag &t = tags[i]; if (t.type == Tag::VAR && t.var.orig == i) { tagnames.insert(vartag_name(t.name, t.rule)); } } + const bool *copy = tagpool[copy_tags]; + for (size_t i = 0; i < ntags; ++i) { + if (copy[i]) { + tagnames.insert(vartag_name_fallback(tags[i])); + } + } ob.tags.insert(tagnames.begin(), tagnames.end()); } @@ -384,4 +391,16 @@ std::string vartag_expr(const std::string *name, size_t rule) return e; } +std::string vartag_name_fallback(const Tag &tag) +{ + const std::string name = (tag.name == NULL ? "" : *tag.name) + "_"; + return vartag_name(&name, tag.rule); +} + +std::string vartag_expr_fallback(const Tag &tag) +{ + const std::string name = (tag.name == NULL ? "" : *tag.name) + "_"; + return vartag_expr(&name, tag.rule); +} + } // end namespace re2c diff --git a/re2c/src/codegen/go.h b/re2c/src/codegen/go.h index defa6a18..b1ace9bb 100644 --- a/re2c/src/codegen/go.h +++ b/re2c/src/codegen/go.h @@ -6,7 +6,7 @@ #include #include "src/codegen/output.h" -#include "src/ir/tagpool.h" +#include "src/ir/tag.h" #include "src/util/c99_stdint.h" #include "src/util/forbid_copy.h" @@ -22,7 +22,7 @@ struct Span { uint32_t ub; State * to; - size_t tags; + tagcmd_t tags; FORBID_COPY (Span); }; @@ -31,10 +31,10 @@ struct Case { std::vector > ranges; const State *to; - size_t tags; + tagcmd_t tags; void emit(OutputFile &o, uint32_t ind) const; - inline Case(): ranges(), to(NULL), tags(ZERO_TAGS) {} + inline Case(): ranges(), to(NULL), tags() {} FORBID_COPY(Case); }; @@ -43,7 +43,7 @@ struct Cases Case *cases; uint32_t cases_size; - void add(uint32_t lb, uint32_t ub, State *to, size_t tags); + void add(uint32_t lb, uint32_t ub, State *to, const tagcmd_t &tags); Cases(const Span *spans, uint32_t nspans); ~Cases(); void emit(OutputFile &o, uint32_t ind, const DFA &dfa, bool &readCh) const; @@ -77,10 +77,10 @@ struct Linear { const Cond *cond; const State *to; - size_t tags; + tagcmd_t tags; - Branch(): cond(NULL), to(NULL), tags(ZERO_TAGS) {} - void init(const Cond *c, const State *s, size_t ts) + Branch(): cond(NULL), to(NULL), tags() {} + void init(const Cond *c, const State *s, const tagcmd_t &ts) { cond = c; to = s; @@ -191,7 +191,7 @@ struct Go { uint32_t nSpans; // number of spans Span * span; - size_t tags; + tagcmd_t tags; enum { EMPTY, diff --git a/re2c/src/codegen/go_construct.cc b/re2c/src/codegen/go_construct.cc index e602e5ea..ba32af84 100644 --- a/re2c/src/codegen/go_construct.cc +++ b/re2c/src/codegen/go_construct.cc @@ -35,7 +35,7 @@ Cases::Cases(const Span *spans, uint32_t nspans) } } -void Cases::add(uint32_t lb, uint32_t ub, State *to, size_t tags) +void Cases::add(uint32_t lb, uint32_t ub, State *to, const tagcmd_t &tags) { for (uint32_t i = 0; i < cases_size; ++i) { Case &c = cases[i]; @@ -186,7 +186,7 @@ Dot::Dot (const Span * sp, uint32_t nsp, const State * s) Go::Go () : nSpans (0) , span (NULL) - , tags (ZERO_TAGS) + , tags () , type (EMPTY) , info () {} @@ -213,7 +213,7 @@ void Go::init (const State * from) bool low_spans_have_tags = false; for (uint32_t i = 0; i < nSpans - hSpans; ++i) { - if (span[i].tags != ZERO_TAGS) { + if (!span[i].tags.empty()) { low_spans_have_tags = true; break; } diff --git a/re2c/src/codegen/go_emit.cc b/re2c/src/codegen/go_emit.cc index 1593d826..f31783ab 100644 --- a/re2c/src/codegen/go_emit.cc +++ b/re2c/src/codegen/go_emit.cc @@ -87,13 +87,13 @@ void Cases::emit(OutputFile &o, uint32_t ind, const DFA &dfa, bool &readCh) cons for (uint32_t i = 1; i < cases_size; ++i) { const Case &c = cases[i]; c.emit(o, ind); - gen_goto_case(o, ind, readCh, c.to, dfa, c.tags); + gen_goto_case(o, ind, readCh, c.to, dfa, c.tags, false); } // default case must be the last one const Case &c = cases[0]; o.wind(ind).ws("default:"); - gen_goto_case(o, ind, readCh, c.to, dfa, c.tags); + gen_goto_case(o, ind, readCh, c.to, dfa, c.tags, false); o.wind(ind).ws("}\n"); } @@ -115,9 +115,9 @@ void Linear::emit(OutputFile &o, uint32_t ind, const DFA &dfa, bool &readCh) const Cond *cond = b.cond; if (cond) { output_if(o, ind, readCh, cond->compare, cond->value); - gen_goto_if(o, ind, readCh, b.to, dfa, b.tags); + gen_goto_if(o, ind, readCh, b.to, dfa, b.tags, false); } else { - gen_goto(o, ind, readCh, b.to, dfa, b.tags); + gen_goto(o, ind, readCh, b.to, dfa, b.tags, false); } } } @@ -151,7 +151,7 @@ void GoBitmap::emit (OutputFile & o, uint32_t ind, const DFA &dfa, bool & readCh o.wu32(bitmap->m); } o.ws(") {\n"); - gen_goto(o, ind + 1, readCh, bitmap_state, dfa, 0); + gen_goto(o, ind + 1, readCh, bitmap_state, dfa, tagcmd_t(), false); o.wind(ind).ws("}\n"); if (lgo != NULL) { @@ -218,7 +218,7 @@ void Dot::emit(OutputFile &o, const DFA &dfa) for (uint32_t j = 0; j < c.ranges.size(); ++j) { o.wrange(c.ranges[j].first, c.ranges[j].second); } - const bool *tags = dfa.tagpool[c.tags]; + const bool *tags = dfa.tagpool[c.tags.set]; for (size_t j = 0; j < dfa.tagpool.ntags; ++j) { if (tags[j]) { const Tag &t = dfa.tags[dfa.tags[j].var.orig]; @@ -232,7 +232,7 @@ void Dot::emit(OutputFile &o, const DFA &dfa) void Go::emit (OutputFile & o, uint32_t ind, const DFA &dfa, bool & readCh) { - gen_settags(o, ind, dfa, tags); + gen_settags(o, ind, dfa, tags, false); switch (type) { case EMPTY: break; diff --git a/re2c/src/ir/adfa/action.h b/re2c/src/ir/adfa/action.h index 8d3a21e1..5b6a9c21 100644 --- a/re2c/src/ir/adfa/action.h +++ b/re2c/src/ir/adfa/action.h @@ -24,7 +24,7 @@ struct Initial {} }; -typedef uniq_vector_t > accept_t; +typedef uniq_vector_t > accept_t; class Action { diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 73cdf3ed..ce31cb5e 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -37,6 +37,7 @@ DFA::DFA , rules(dfa.rules) , tags(dfa.tags) , tagpool(dfa.tagpool) + , copy_tags(dfa.copy_tags) // statistics , max_fill (0) @@ -57,7 +58,9 @@ DFA::DFA // for all of them) // If generic API is used, fixed-length contexts are forbidden, // which may cause additional overlaps. - , basetag (used_tags > 1 || opts->tags) + , basetag (used_tags > 1 + || dfa.copy_tags != ZERO_TAGS + || opts->tags) { const size_t nstates = dfa.states.size(); const size_t nchars = dfa.nchars; @@ -88,7 +91,7 @@ DFA::DFA for (uint32_t c = 0; c < nchars; ++j) { const size_t to = t->arcs[c]; - const size_t tags = t->tags[c]; + const tagcmd_t tags = t->tags[c]; for (;++c < nchars && t->arcs[c] == to && t->tags[c] == tags;); s->go.span[j].to = to == dfa_t::NIL ? NULL : i2s[to]; s->go.span[j].ub = charset[c]; diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index 433988fe..45247fda 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -12,7 +12,7 @@ #include "src/ir/adfa/action.h" #include "src/ir/regexp/regexp.h" #include "src/ir/rule.h" -#include "src/ir/tagpool.h" +#include "src/ir/tag.h" #include "src/util/forbid_copy.h" namespace re2c @@ -31,7 +31,7 @@ struct State bool fallback; size_t rule; - size_t rule_tags; + tagcmd_t rule_tags; bool isBase; Go go; Action action; @@ -42,7 +42,7 @@ struct State , fill (0) , fallback (false) , rule (Rule::NONE) - , rule_tags (0) + , rule_tags () , isBase (false) , go () , action () @@ -69,6 +69,7 @@ struct DFA std::valarray &rules; std::valarray &tags; Tagpool &tagpool; + size_t copy_tags; size_t max_fill; bool need_backup; bool need_backupctx; diff --git a/re2c/src/ir/adfa/prepare.cc b/re2c/src/ir/adfa/prepare.cc index 517568ea..9b8a29fd 100644 --- a/re2c/src/ir/adfa/prepare.cc +++ b/re2c/src/ir/adfa/prepare.cc @@ -25,7 +25,7 @@ void DFA::split(State *s) s->go.span = allocate (1); s->go.span[0].ub = ubChar; s->go.span[0].to = move; - s->go.span[0].tags = ZERO_TAGS; + s->go.span[0].tags = tagcmd_t(); } static uint32_t merge(Span *x, State *fg, State *bg) @@ -39,7 +39,7 @@ static uint32_t merge(Span *x, State *fg, State *bg) for (;!(f == fe && b == be);) { if (f->to == b->to && f->tags == b->tags) { x->to = bg; - x->tags = ZERO_TAGS; + x->tags = tagcmd_t(); } else { x->to = f->to; x->tags = f->tags; @@ -115,7 +115,8 @@ void DFA::prepare () for (uint32_t i = 0; i < s->go.nSpans; ++i) { if (!s->go.span[i].to) { s->go.span[i].to = rule2state[s->rule]; - s->go.span[i].tags = s->rule_tags; + s->go.span[i].tags.set = s->rule_tags.set; + // 'copy' tags are only for fallback paths } } } @@ -143,7 +144,7 @@ void DFA::prepare () if (default_state) { for (State *s = head; s; s = s->next) { if (s->fallback) { - const std::pair acc(rule2state[s->rule], s->rule_tags); + const std::pair acc(rule2state[s->rule], s->rule_tags); s->action.set_save(accepts.find_or_add(acc)); } } @@ -213,15 +214,15 @@ void DFA::hoist_tags() const size_t nsp = s->go.nSpans; if (nsp > 0) { Span *sp = s->go.span; - const size_t tags0 = sp[0].tags; - bool common_tags = tags0 != ZERO_TAGS; + const tagcmd_t tags0 = sp[0].tags; + bool common_tags = !tags0.empty(); for (uint32_t i = 1; common_tags && i < nsp; ++i) { common_tags &= sp[i].tags == tags0; } if (common_tags) { s->go.tags = tags0; for (uint32_t i = 0; i < nsp; ++i) { - sp[i].tags = ZERO_TAGS; + sp[i].tags = tagcmd_t(); } } } diff --git a/re2c/src/ir/compile.cc b/re2c/src/ir/compile.cc index c3466885..0879fcc7 100644 --- a/re2c/src/ir/compile.cc +++ b/re2c/src/ir/compile.cc @@ -66,6 +66,8 @@ static smart_ptr compile_rules( cutoff_dead_rules(dfa, defrule, cond); + insert_fallback_tags(dfa); + // try to minimize the number of tag variables const size_t used_tags = deduplicate_tags(dfa); diff --git a/re2c/src/ir/dfa/dead_rules.cc b/re2c/src/ir/dfa/dead_rules.cc index 6a908257..0278ef72 100644 --- a/re2c/src/ir/dfa/dead_rules.cc +++ b/re2c/src/ir/dfa/dead_rules.cc @@ -164,7 +164,7 @@ static void remove_dead_final_states(dfa_t &dfa, const bool *live) if (s->rule != Rule::NONE && !live[s->rule * nstates + i]) { s->rule = Rule::NONE; - s->rule_tags = 0; + s->rule_tags = tagcmd_t(); } } } diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 171b239b..e5524fb8 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -55,6 +55,7 @@ dfa_t::dfa_t(const nfa_t &nfa, , rules(nfa.rules) , tags(*nfa.tags) , tagpool(*nfa.tagpool) + , copy_tags(ZERO_TAGS) { clospool_t clospool; closure_t clos1, clos2; @@ -78,7 +79,7 @@ dfa_t::dfa_t(const nfa_t &nfa, f = std::find_if(clos0.begin(), e, clos_t::final); if (f != e) { s->rule = f->state->rule; - s->rule_tags = f->tagidx; + s->rule_tags.set = f->tagidx; } // for each alphabet symbol, build tagged epsilon-closure @@ -86,7 +87,7 @@ dfa_t::dfa_t(const nfa_t &nfa, // find identical closure or add the new one for (size_t c = 0; c < nchars; ++c) { reach(clos0, clos1, charset[c]); - s->tags[c] = closure(clos1, clos2, tagpool, rules, badtags); + s->tags[c].set = closure(clos1, clos2, tagpool, rules, badtags); s->arcs[c] = clospool.insert(clos2); } } diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index f175282d..ef3e63e4 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -19,17 +19,17 @@ struct nfa_t; struct dfa_state_t { size_t *arcs; - size_t *tags; + tagcmd_t *tags; size_t rule; - size_t rule_tags; + tagcmd_t rule_tags; bool fallthru; bool fallback; explicit dfa_state_t(size_t nchars) : arcs(new size_t[nchars]) - , tags(new size_t[nchars]) + , tags(new tagcmd_t[nchars]) , rule(Rule::NONE) - , rule_tags(ZERO_TAGS) + , rule_tags() , fallthru(false) , fallback(false) {} @@ -51,6 +51,7 @@ struct dfa_t std::valarray &rules; std::valarray &tags; Tagpool &tagpool; + size_t copy_tags; dfa_t(const nfa_t &nfa, const charset_t &charset, const std::string &cond); @@ -68,6 +69,7 @@ enum dfa_minimization_t void minimization(dfa_t &dfa); void fillpoints(const dfa_t &dfa, std::vector &fill); void cutoff_dead_rules(dfa_t &dfa, size_t defrule, const std::string &cond); +void insert_fallback_tags(dfa_t &dfa); size_t deduplicate_tags(dfa_t &dfa); } // namespace re2c diff --git a/re2c/src/ir/dfa/fallback_tags.cc b/re2c/src/ir/dfa/fallback_tags.cc new file mode 100644 index 00000000..f5cfb77b --- /dev/null +++ b/re2c/src/ir/dfa/fallback_tags.cc @@ -0,0 +1,101 @@ +#include "src/ir/dfa/dfa.h" + +namespace re2c +{ + +static void find_overwritten_tags(const dfa_t &dfa, size_t state, bool *been, bool *owrt); + +/* note [fallback tags] + * + * We need to backup tags in fallback states: these tags may be + * updated on some non-accepting path from fallback state, and if + * the attempt to match longer rule fails, there's no way to restore + * the overwritten tags. + * + * Two different things may cause overwrites: + * - self-overlapping rules + * - overlapping rules which tags have been merged by deduplication + * + * We can prevent deduplication from merging such tags by propagating + * their liveness on all non-accepting paths from fallback state. + * + * Handling self-overlapping rules is not so easy: damage is done by + * DFA construction, which knows nothing about fallbacks. We have to + * manually create one backup tag for each potentially overwritten + * tag and add backup/restore instructions: transitions from fallback + * state must backup tag, transitions from 'yyaccept' state must + * restore it. + * + * We take special care not to create redundant backups (trace all + * non-accepting paths from the given fallback state to find which + * tags are potentially overwritten). In theory, deduplication + * could remove redundant backups, but in practice such tags cause + * artifical interference and confuse coloring heuristic. + */ + +void find_overwritten_tags(const dfa_t &dfa, size_t state, + bool *been, bool *owrt) +{ + if (been[state]) return; + been[state] = true; + + const dfa_state_t *s = dfa.states[state]; + const size_t ntags = dfa.tags.size(); + + for (size_t c = 0; c < dfa.nchars; ++c) { + const bool *tags = dfa.tagpool[s->tags[c].set]; + for (size_t t = 0; t < ntags; ++t) { + owrt[t] |= tags[t]; + } + + size_t dest = s->arcs[c]; + if (dest != dfa_t::NIL && dfa.states[dest]->fallthru) { + find_overwritten_tags(dfa, dest, been, owrt); + } + } +} + +// WARNING: this function assumes that falthrough and fallback +// attributes of DFA states have already been calculated, see +// note [fallback states] +void insert_fallback_tags(dfa_t &dfa) +{ + const size_t + nstates = dfa.states.size(), + ntags = dfa.tags.size(); + bool *been = new bool[nstates]; + + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + + if (!s->fallback) continue; + + bool *owrt = dfa.tagpool.buffer1; + std::fill(owrt, owrt + ntags, false); + std::fill(been, been + nstates, false); + find_overwritten_tags(dfa, i, been, owrt); + + const bool + *fin = dfa.tagpool[dfa.rules[s->rule].tags], + *upd = dfa.tagpool[s->rule_tags.set]; + for (size_t t = 0; t < ntags; ++t) { + owrt[t] &= fin[t] && !upd[t]; + } + const size_t copy = dfa.tagpool.insert(owrt); + + if (copy != ZERO_TAGS) { + for (size_t c = 0; c < dfa.nchars; ++c) { + size_t j = s->arcs[c]; + if (j != dfa_t::NIL && dfa.states[j]->fallthru) { + s->tags[c].copy = copy; + } + } + s->rule_tags.copy = copy; + dfa.copy_tags = dfa.tagpool.orl(dfa.copy_tags, copy); + } + } + + delete[] been; +} + +} // namespace re2c diff --git a/re2c/src/ir/dfa/minimization.cc b/re2c/src/ir/dfa/minimization.cc index 2dcffcb6..1df1c9b7 100644 --- a/re2c/src/ir/dfa/minimization.cc +++ b/re2c/src/ir/dfa/minimization.cc @@ -134,11 +134,11 @@ static void minimization_moore( size_t *next = new size_t[count]; // see note [distinguish states by tags] - std::map, size_t> init; + std::map, size_t> init; for (size_t i = 0; i < count; ++i) { dfa_state_t *s = states[i]; - std::pair key(s->rule, s->rule_tags); + std::pair key(s->rule, s->rule_tags); if (init.insert(std::make_pair(key, i)).second) { part[i] = i; @@ -190,7 +190,7 @@ static void minimization_moore( nchars * sizeof(size_t)) == 0 && memcmp(states[j]->tags, states[k]->tags, - nchars * sizeof(size_t)) == 0 + nchars * sizeof(tagcmd_t)) == 0 ) { part[j] = k; next[j] = next[k]; diff --git a/re2c/src/ir/dfa/tag_deduplication.cc b/re2c/src/ir/dfa/tag_deduplication.cc index ee9c5404..298620d1 100644 --- a/re2c/src/ir/dfa/tag_deduplication.cc +++ b/re2c/src/ir/dfa/tag_deduplication.cc @@ -70,7 +70,7 @@ static void calc_live(const dfa_t &dfa, size_t *live) if (j != dfa_t::NIL) { a->edge = i * dfa.nchars + c; a->dest = i; - a->tags = s->tags[c]; + a->tags = s->tags[c].set; a->next = rdfa[j]; rdfa[j] = a++; } @@ -80,7 +80,7 @@ static void calc_live(const dfa_t &dfa, size_t *live) const dfa_state_t *s = dfa.states[i]; if (s->rule != Rule::NONE) { const size_t need = dfa.tagpool.andlinv( - dfa.rules[s->rule].tags, s->rule_tags); + dfa.rules[s->rule].tags, s->rule_tags.set); std::fill(been, been + nstates, false); backprop(rdfa, been, i, live, need, dfa.tagpool); } @@ -93,7 +93,7 @@ static void calc_live(const dfa_t &dfa, size_t *live) dfa_state_t *s = dfa.states[i]; if (s->fallback) { const size_t need = dfa.tagpool.andlinv( - dfa.rules[s->rule].tags, s->rule_tags); + dfa.rules[s->rule].tags, s->rule_tags.set); std::fill(been, been + nstates, false); forwprop(dfa, been, i, live, need); } @@ -110,11 +110,11 @@ static void mask_dead(dfa_t &dfa, const size_t *live) for (size_t c = 0; c < dfa.nchars; ++c) { const size_t j = s->arcs[c]; if (j != dfa_t::NIL) { - s->tags[c] = dfa.tagpool.andl(s->tags[c], l[c]); + s->tags[c].set = dfa.tagpool.andl(s->tags[c].set, l[c]); } } if (s->rule != Rule::NONE) { - s->rule_tags = dfa.tagpool.andl(s->rule_tags, + s->rule_tags.set = dfa.tagpool.andl(s->rule_tags.set, dfa.rules[s->rule].tags); } } @@ -150,12 +150,12 @@ static void incompatibility_table(const dfa_t &dfa, for (size_t c = 0; c < dfa.nchars; ++c) { const size_t j = s->arcs[c]; if (j != dfa_t::NIL) { - incompatible(incompattbl, dfa.tagpool, l[c], s->tags[c]); + incompatible(incompattbl, dfa.tagpool, l[c], s->tags[c].set); } } if (s->rule != Rule::NONE) { incompatible(incompattbl, dfa.tagpool, - dfa.rules[s->rule].tags, s->rule_tags); + dfa.rules[s->rule].tags, s->rule_tags.set); } } @@ -222,10 +222,13 @@ static void patch_tags(dfa_t &dfa, const size_t *represent) for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; for (size_t c = 0; c < dfa.nchars; ++c) { - s->tags[c] = dfa.tagpool.subst(s->tags[c], represent); + s->tags[c].set = dfa.tagpool.subst(s->tags[c].set, represent); + s->tags[c].copy = dfa.tagpool.subst(s->tags[c].copy, represent); } - s->rule_tags = dfa.tagpool.subst(s->rule_tags, represent); + s->rule_tags.set = dfa.tagpool.subst(s->rule_tags.set, represent); + s->rule_tags.copy = dfa.tagpool.subst(s->rule_tags.copy, represent); } + dfa.copy_tags = dfa.tagpool.subst(dfa.copy_tags, represent); const size_t ntags = dfa.tags.size(); for (size_t i = 0; i < ntags; ++i) { diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index 5e7cc854..6b40ecee 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -87,9 +87,10 @@ Skeleton::Skeleton( // in skeleton we are only interested in trailing contexts // which may be attributed to states rather than transitions - size_t tags = s->rule_tags; + // trailing context also cannot have fallback tag + size_t tags = s->rule_tags.set; for (size_t c = 0; c < nc; ++c) { - tags = dfa.tagpool.orl(tags, s->tags[c]); + tags = dfa.tagpool.orl(tags, s->tags[c].set); } nodes[i].init(dfa.tagpool[tags], s->rule, arcs); diff --git a/re2c/src/ir/tag.h b/re2c/src/ir/tag.h index bfe0ab4c..b9fbdfc8 100644 --- a/re2c/src/ir/tag.h +++ b/re2c/src/ir/tag.h @@ -3,6 +3,7 @@ #include +#include "src/ir/tagpool.h" #include "src/util/forbid_copy.h" namespace re2c @@ -35,6 +36,36 @@ struct Tag void init_var_tag(Tag &tag, size_t r, const std::string *n, size_t o); void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d); +/* must be packed */ +struct tagcmd_t +{ + size_t set; + size_t copy; + + tagcmd_t(): set(ZERO_TAGS), copy(ZERO_TAGS) {} + tagcmd_t(size_t s, size_t c): set(s), copy(c) {} + inline bool empty() const + { + return set == ZERO_TAGS && copy == ZERO_TAGS; + } +}; + +inline bool operator==(const tagcmd_t &x, const tagcmd_t &y) +{ + return x.set == y.set && x.copy == y.copy; +} + +inline bool operator!=(const tagcmd_t &x, const tagcmd_t &y) +{ + return !(x == y); +} + +inline bool operator<(const tagcmd_t &x, const tagcmd_t &y) +{ + return x.set < y.set + || (x.set == y.set && x.copy < y.copy); +} + } // namespace re2c #endif // _RE2C_IR_TAG_ diff --git a/re2c/test/tags/fallback3.i--tags.c b/re2c/test/tags/fallback3.i--tags.c new file mode 100644 index 00000000..1a7b7d5e --- /dev/null +++ b/re2c/test/tags/fallback3.i--tags.c @@ -0,0 +1,73 @@ +/* Generated by re2c */ +// This example shows the need of backuping tags in fallback +// states: if the rule is self-overlapping and the overlapping +// part has tags, these tags might be overwritten by an unsuccessful +// attempt to match longer input. + + +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + long yytag0p; + long yytag0p_; + YYCTXMARKER = YYCURSOR; + if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy4; + default: goto yy2; + } +yy2: + ++YYCURSOR; +yy3: + {} +yy4: + yyaccept = 0; + yych = *(YYMARKER = ++YYCURSOR); + switch (yych) { + case 'b': + yytag0p = (YYCURSOR - YYCTXMARKER); + goto yy5; + default: goto yy3; + } +yy5: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'c': goto yy7; + default: goto yy6; + } +yy6: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy3; + } else { + yytag0p = yytag0p_; + goto yy8; + } +yy7: + yyaccept = 1; + YYMARKER = ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': + yytag0p_ = yytag0p; + goto yy9; + default: goto yy8; + } +yy8: + { (YYCTXMARKER + yytag0p) } +yy9: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'b': + yytag0p = (YYCURSOR - YYCTXMARKER); + goto yy5; + default: goto yy6; + } +} + diff --git a/re2c/test/tags/fallback3.i--tags.re b/re2c/test/tags/fallback3.i--tags.re new file mode 100644 index 00000000..78da5951 --- /dev/null +++ b/re2c/test/tags/fallback3.i--tags.re @@ -0,0 +1,11 @@ +// This example shows the need of backuping tags in fallback +// states: if the rule is self-overlapping and the overlapping +// part has tags, these tags might be overwritten by an unsuccessful +// attempt to match longer input. + +/*!re2c + + ("a" @p "bc")+ { @p } + * {} + +*/ -- 2.40.0