From 14db44c0de3978090483dddbd3e74c6b771b7a1d Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sun, 1 May 2016 09:18:07 +0100 Subject: [PATCH] More precise liveness analyses for context deduplication. Liveness analyses should exclude contexts that are updated before they are used past certain point in the program. --- re2c/Makefile.am | 1 + re2c/src/codegen/emit_action.cc | 4 +- re2c/src/codegen/input_api.cc | 8 +- re2c/src/codegen/input_api.h | 2 +- re2c/src/ir/adfa/adfa.cc | 7 +- re2c/src/ir/adfa/adfa.h | 6 +- re2c/src/ir/dfa/context_deduplication.cc | 286 +++++++++++++------ re2c/src/ir/dfa/determinization.cc | 11 +- re2c/src/ir/dfa/dfa.h | 10 +- re2c/src/ir/dfa/minimization.cc | 6 +- re2c/src/ir/nfa/nfa.cc | 9 +- re2c/src/ir/rule.cc | 12 + re2c/src/ir/rule.h | 9 +- re2c/src/ir/skeleton/path.h | 3 +- re2c/src/ir/skeleton/skeleton.cc | 18 +- re2c/src/ir/skeleton/skeleton.h | 4 +- re2c/src/ir/tagpool.h | 60 ++++ re2c/test/contexts/dedup0.i--input(custom).c | 13 +- re2c/test/contexts/dedup0.i.c | 13 +- re2c/test/contexts/dedup2.i--input(custom).c | 13 +- re2c/test/contexts/dedup2.i.c | 13 +- 21 files changed, 343 insertions(+), 165 deletions(-) create mode 100644 re2c/src/ir/tagpool.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index a5f472c5..621f6f0e 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -38,6 +38,7 @@ SRC_HDR = \ src/ir/compile.h \ src/ir/ctx.h \ src/ir/rule.h \ + src/ir/tagpool.h \ src/ir/skeleton/path.h \ src/ir/skeleton/skeleton.h \ src/globals.h \ diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index 632ee8f3..8c32608e 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -54,9 +54,9 @@ void emit_action(OutputFile &o, uint32_t ind, bool &readCh, emit_rule(o, ind, dfa, s->action.info.rule); break; } - if (!s->ctxs.empty()) { + if (s->tags != 0) { if (dfa.base_ctxmarker) { - o.wstring(opts->input_api.stmt_dist(ind, s->ctxs, dfa.contexts)); + o.wstring(opts->input_api.stmt_dist(ind, dfa.tagpool[s->tags], dfa.contexts)); } else { o.wstring(opts->input_api.stmt_backupctx(ind)); } diff --git a/re2c/src/codegen/input_api.cc b/re2c/src/codegen/input_api.cc index 4f0f010a..659c4d43 100644 --- a/re2c/src/codegen/input_api.cc +++ b/re2c/src/codegen/input_api.cc @@ -107,12 +107,14 @@ std::string InputAPI::expr_dist () const return s; } -std::string InputAPI::stmt_dist (uint32_t ind, const std::set &ctxs, +std::string InputAPI::stmt_dist (uint32_t ind, const bool *tags, const std::vector &contexts) const { std::string s = indent(ind); - for (std::set::const_iterator i = ctxs.begin(); i != ctxs.end(); ++i) { - s += contexts[*i].expr() + " = "; + for (size_t i = 0; i < contexts.size(); ++i) { + if (tags[i]) { + s += contexts[i].expr() + " = "; + } } return s + expr_dist() + ";\n"; } diff --git a/re2c/src/codegen/input_api.h b/re2c/src/codegen/input_api.h index b575e57c..4ef77a0f 100644 --- a/re2c/src/codegen/input_api.h +++ b/re2c/src/codegen/input_api.h @@ -33,7 +33,7 @@ public: std::string stmt_backup (uint32_t ind) const; std::string stmt_backupctx (uint32_t ind) const; std::string expr_dist () const; - std::string stmt_dist (uint32_t ind, const std::set &ctxs, + std::string stmt_dist (uint32_t ind, const bool *tags, const std::vector &contexts) const; std::string expr_ctx (const std::string &ctx) const; std::string expr_ctx_fix (const CtxFix &ctx, const std::vector &ctxvars) const; diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 8ab2539a..0678ce24 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -37,6 +37,7 @@ DFA::DFA , head(NULL) , rules(dfa.rules) , contexts(dfa.contexts) + , tagpool(dfa.tagpool) // statistics , max_fill (0) @@ -78,7 +79,7 @@ DFA::DFA *p = s; p = &s->next; - s->ctxs = t->ctxs; + s->tags = t->tags; s->rule = t->rule; s->fill = fill[i]; s->go.span = allocate(nchars); @@ -112,9 +113,9 @@ DFA::~DFA() } delete skeleton; - - delete &contexts; delete &rules; + delete &contexts; + delete &tagpool; } /* note [reordering DFA states] diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index b19b1fac..693dcb24 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -12,6 +12,7 @@ #include "src/ir/adfa/action.h" #include "src/ir/regexp/regexp.h" #include "src/ir/rule.h" +#include "src/ir/tagpool.h" #include "src/util/forbid_copy.h" namespace re2c @@ -30,7 +31,7 @@ struct State bool fallback; size_t rule; - std::set ctxs; + size_t tags; bool isBase; Go go; Action action; @@ -41,7 +42,7 @@ struct State , fill (0) , fallback (false) , rule (Rule::NONE) - , ctxs () + , tags (0) , isBase (false) , go () , action () @@ -67,6 +68,7 @@ struct DFA State * head; std::valarray &rules; std::vector &contexts; + Tagpool &tagpool; size_t max_fill; bool need_backup; bool need_backupctx; diff --git a/re2c/src/ir/dfa/context_deduplication.cc b/re2c/src/ir/dfa/context_deduplication.cc index 99a4f225..c74ece58 100644 --- a/re2c/src/ir/dfa/context_deduplication.cc +++ b/re2c/src/ir/dfa/context_deduplication.cc @@ -1,156 +1,252 @@ #include -#include -#include #include "src/ir/dfa/dfa.h" namespace re2c { -static void calc_live( - const dfa_t &dfa, - const std::set &fallback, - std::vector &visited, - std::vector > &live, - size_t i) +static size_t mask_dead_tags(Tagpool &tagpool, + size_t oldidx, const bool *live) { - if (!visited[i]) { - visited[i] = true; - - dfa_state_t *s = dfa.states[i]; - - bool fallthru = false; - for (size_t c = 0; c < dfa.nchars; ++c) { - const size_t j = s->arcs[c]; - if (j != dfa_t::NIL) { - calc_live(dfa, fallback, visited, live, j); - live[i].insert(live[j].begin(), live[j].end()); - } else { - fallthru = true; - } - } + const bool *oldtags = tagpool[oldidx]; + bool *newtags = new bool[tagpool.ntags]; + for (size_t i = 0; i < tagpool.ntags; ++i) { + newtags[i] = oldtags[i] & live[i]; + } + const size_t newidx = tagpool.insert(newtags); + delete[] newtags; + return newidx; +} - if (s->rule != Rule::NONE) { - const Rule &rule = dfa.rules[s->rule]; - for (size_t j = rule.ltag; j < rule.htag; ++j) { - live[i].insert(j); - } - if (rule.trail.type == Trail::VAR) { - live[i].insert(rule.trail.pld.var); - } - } else if (fallthru) { - // transition to default state: all fallback rules - // are potentialy reachable, their contexts are alive - live[i].insert(fallback.begin(), fallback.end()); +static bool dangling_arcs(const size_t *arcs, size_t narcs) +{ + for (size_t i = 0; i < narcs; ++i) { + if (arcs[i] == dfa_t::NIL) { + return true; } } + return false; } -size_t deduplicate_contexts(dfa_t &dfa, - const std::vector &fallback) +/* note [liveness analyses on tags] + * + * Tag T is alive in state S if either: + * + * - There is a transition from S to default state, S does not set T, + * S is final and T belongs to tag set associated with rule in S. + * + * - There is a transition from S to default state, S does not set T + * and T belongs to any tag set associated with fallback rules. + * + * - There is a transition from S to some state S' (maybe equal to S), + * S does not set T and T is alive in S'. + */ +static void calc_live(const dfa_t &dfa, + const bool *fallback, + bool *visited, + bool *live, + size_t i) { - const size_t nctxs = dfa.contexts.size(); - if (nctxs < 2) { - return nctxs; + if (visited[i]) { + return; } - std::set fbctxs; - for (size_t i = 0; i < fallback.size(); ++i) { - const Rule &rule = dfa.rules[dfa.states[fallback[i]]->rule]; - for (size_t j = rule.ltag; j < rule.htag; ++j) { - fbctxs.insert(j); + visited[i] = true; + dfa_state_t *s = dfa.states[i]; + const size_t ntags = dfa.contexts.size(); + + // add tags before recursing to child states, + // so that tags propagate into loopbacks to this state + if (dangling_arcs(s->arcs, dfa.nchars)) { + if (s->rule != Rule::NONE) { + // final state, only rule tags are alive + add_tags_with_mask(&live[i * ntags], + dfa.rules[s->rule].tags, + dfa.tagpool[s->tags], + ntags); + } else { + // transition to default state and dispatch on + // 'yyaccept': all fallback rules are potentially + // reachable, their tags are alive + // no mask (no rule implies no rule tags) + add_tags(&live[i * ntags], fallback, ntags); } - if (rule.trail.type == Trail::VAR) { - fbctxs.insert(rule.trail.pld.var); + } + + for (size_t c = 0; c < dfa.nchars; ++c) { + const size_t j = s->arcs[c]; + if (j != dfa_t::NIL) { + calc_live(dfa, fallback, visited, live, j); + add_tags_with_mask(&live[i * ntags], + &live[j * ntags], + dfa.tagpool[s->tags], + ntags); } } +} +static void mask_dead(dfa_t &dfa, + const bool *livetags) +{ const size_t nstates = dfa.states.size(); - - std::vector visited(nstates); - std::vector > live(nstates); - calc_live(dfa, fbctxs, visited, live, 0); - - std::vector buffer(nctxs); - std::vector xxx(nctxs * nctxs); + const size_t ntags = dfa.contexts.size(); for (size_t i = 0; i < nstates; ++i) { - const std::set - &ctxs = dfa.states[i]->ctxs, - &liv = live[i]; - std::vector::iterator - diff = buffer.begin(), - diff_end = std::set_difference(liv.begin(), liv.end(), - ctxs.begin(), ctxs.end(), diff); - for (; diff != diff_end; ++diff) { - for (std::set::const_iterator j = ctxs.begin(); j != ctxs.end(); ++j) { - xxx[*diff * nctxs + *j] = xxx[*j * nctxs + *diff] = true; + dfa_state_t *s = dfa.states[i]; + mask_dead_tags(dfa.tagpool, s->tags, &livetags[i * ntags]); + } +} + +// tags that are updated here are pairwise incompatible +// with all tags that are alive, but not updated here +static void incompatible(bool *tbl, + const bool *live, + const bool *tags, + size_t ntags) +{ + for (size_t i = 0; i < ntags; ++i) { + if (live[i] && !tags[i]) { + for (size_t j = 0; j < ntags; ++j) { + if (tags[j]) { + tbl[i * ntags + j] = tbl[j * ntags + i] = true; + } } } } +} - // We have a binary relation on the set of all contexts - // and must construct set decomposition into subsets such that - // all contexts in the same subset are equivalent. - // - // This problem is isomorphic to partitioning graph into cliques - // (aka finding the 'clique cover' of a graph). - // - // Finding minimal clique cover in arbitrary graph is NP-complete. - // We build just some cover (not necessarily minimal). - // The algorithm takes quadratic (in the number of contexts) time. +static void incompatibility_table(const dfa_t &dfa, + const bool *livetags, + bool *incompattbl) +{ + const size_t nstates = dfa.states.size(); + const size_t ntags = dfa.contexts.size(); + for (size_t i = 0; i < nstates; ++i) { + const dfa_state_t *s = dfa.states[i]; + incompatible(incompattbl, &livetags[i * ntags], + dfa.tagpool[s->tags], ntags); + } +} +/* We have a binary relation on the set of all tags + * and must construct set decomposition into subsets such that + * all contexts in the same subset are equivalent. + * + * This problem is isomorphic to partitioning graph into cliques + * (aka finding the 'clique cover' of a graph). + * + * Finding minimal clique cover in arbitrary graph is NP-complete. + * We build just some cover (not necessarily minimal). + * The algorithm takes quadratic (in the number of tags) time. + * static void equivalence_classes(const std::vector &incompattbl, + */ +static size_t equivalence_classes(const bool *incompattbl, + size_t ntags, std::vector &represent) +{ static const size_t END = std::numeric_limits::max(); std::vector - part(nctxs, 0), // partition: mapping of context to its representative - head(nctxs, END), // list of representatives - next(nctxs, END); // list of contexts mapped to the same representative - // skip the 1st context, it maps to itself - for (size_t c = 1; c < nctxs; ++c) { + head(ntags, END), // list of representatives + next(ntags, END); // list of tags mapped to the same representative + + // skip the 1st tag, it maps to itself + for (size_t c = 1; c < ntags; ++c) { size_t h; for (h = 0; h != END; h = head[h]) { size_t n; for (n = h; n != END; n = next[n]) { - if (xxx[c * nctxs + n]) { + if (incompattbl[c * ntags + n]) { break; } } if (n == END) { - part[c] = h; + represent[c] = h; next[c] = next[h]; next[h] = c; break; } } if (h == END) { - part[c] = c; + represent[c] = c; head[c] = head[0]; head[0] = c; } } - size_t ncontexts = 0; - for (size_t i = 0; i < nctxs; ++i) { - if (part[i] == i) { - ++ncontexts; + + size_t nreps = 0; + for (size_t i = 0; i < ntags; ++i) { + if (represent[i] == i) { + ++nreps; } } + return nreps; +} - for (size_t i = 0; i < nstates; ++i) { - dfa_state_t *s = dfa.states[i]; - std::set ctxs; - for (std::set::const_iterator j = s->ctxs.begin(); j != s->ctxs.end(); ++j) { - ctxs.insert(part[*j]); +static size_t patch_tagset(Tagpool &tagpool, size_t oldidx, + const std::vector &represent) +{ + const bool *oldtags = tagpool[oldidx]; + bool *newtags = new bool[tagpool.ntags](); + for (size_t i = 0; i < tagpool.ntags; ++i) { + if (oldtags[i]) { + newtags[represent[i]] = true; } - s->ctxs.swap(ctxs); } - for (size_t i = 0; i < nctxs; ++i) { - dfa.contexts[i].uniqname = dfa.contexts[part[i]].uniqname; + const size_t newidx = tagpool.insert(newtags); + delete[] newtags; + return newidx; +} + +static void patch_tags(dfa_t &dfa, const std::vector &represent) +{ + const size_t nstates = dfa.states.size(); + for (size_t i = 0; i < nstates; ++i) { + dfa_state_t *s = dfa.states[i]; + s->tags = patch_tagset(dfa.tagpool, s->tags, represent); } - return ncontexts; + const size_t ntags = dfa.contexts.size(); + for (size_t i = 0; i < ntags; ++i) { + dfa.contexts[i].uniqname = dfa.contexts[represent[i]].uniqname; + } } +size_t deduplicate_contexts(dfa_t &dfa, + const std::vector &fallback) +{ + const size_t ntags = dfa.contexts.size(); + if (ntags == 0) { + return 0; + } + + bool *fbctxs = new bool[ntags](); + for (size_t i = 0; i < fallback.size(); ++i) { + const size_t r = dfa.states[fallback[i]]->rule; + add_tags(fbctxs, dfa.rules[r].tags, ntags); + } + + const size_t nstates = dfa.states.size(); + bool *visited = new bool[nstates](); + bool *live = new bool[nstates * ntags](); + calc_live(dfa, fbctxs, visited, live, 0); + + mask_dead(dfa, live); + + bool *incompattbl = new bool[ntags * ntags](); + incompatibility_table(dfa, live, incompattbl); + std::vector represent(ntags, 0); + const size_t nreps = equivalence_classes(incompattbl, ntags, represent); + if (nreps < ntags) { + patch_tags(dfa, represent); + } + + delete[] fbctxs; + delete[] visited; + delete[] live; + delete[] incompattbl; + + return nreps; +} } // namespace re2c diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 1b80cd2f..a8c242fe 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -96,21 +96,25 @@ dfa_t::dfa_t( , nchars(charset.size() - 1) // (n + 1) bounds for n ranges , rules(nfa.rules) , contexts(nfa.contexts) + , tagpool(*new Tagpool(contexts.size())) { + const size_t ntags = contexts.size(); const size_t nrules = rules.size(); ord_hash_set_t kernels; nfa_state_t **const buffer = new nfa_state_t*[nfa.size]; std::vector > arcs(nchars); bool *fin = new bool[nrules]; + bool *tags = new bool[ntags]; find_state(buffer, closure(buffer, nfa.root), kernels); for (size_t i = 0; i < kernels.size(); ++i) { - dfa_state_t *s = new dfa_state_t; + dfa_state_t *s = new dfa_state_t(nchars); states.push_back(s); memset(fin, 0, nrules * sizeof(bool)); + memset(tags, 0, ntags * sizeof(bool)); nfa_state_t **kernel; const size_t kernel_size = kernels.deref(i, kernel); @@ -134,7 +138,7 @@ dfa_t::dfa_t( break; } case nfa_state_t::CTX: - s->ctxs.insert(n->value.ctx.info); + tags[n->value.ctx.info] = true; break; case nfa_state_t::FIN: fin[n->value.fin.rule] = true; @@ -144,7 +148,6 @@ dfa_t::dfa_t( } } - s->arcs = new size_t[nchars]; for(size_t c = 0; c < nchars; ++c) { nfa_state_t **end = buffer; @@ -154,6 +157,7 @@ dfa_t::dfa_t( } s->arcs[c] = find_state(buffer, end, kernels); } + s->tags = tagpool.insert(tags); // choose the first rule (the one with smallest rank) size_t r; @@ -177,6 +181,7 @@ dfa_t::dfa_t( } delete[] buffer; delete[] fin; + delete[] tags; check_context_selfoverlap(kernels, contexts, line, cond); } diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 2f1e720c..1bea33fe 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -8,6 +8,7 @@ #include "src/ir/regexp/regexp.h" #include "src/ir/rule.h" +#include "src/ir/tagpool.h" #include "src/util/forbid_copy.h" #include "src/util/ord_hash_set.h" @@ -20,12 +21,12 @@ struct dfa_state_t { size_t *arcs; size_t rule; - std::set ctxs; + size_t tags; - dfa_state_t() - : arcs(NULL) + explicit dfa_state_t(size_t nchars) + : arcs(new size_t[nchars]) , rule(Rule::NONE) - , ctxs() + , tags(0) {} ~dfa_state_t() { @@ -43,6 +44,7 @@ struct dfa_t const size_t nchars; std::valarray &rules; std::vector &contexts; + Tagpool &tagpool; dfa_t(const nfa_t &nfa, const charset_t &charset, uint32_t line, const std::string &cond); diff --git a/re2c/src/ir/dfa/minimization.cc b/re2c/src/ir/dfa/minimization.cc index d00e13f5..33ada5bb 100644 --- a/re2c/src/ir/dfa/minimization.cc +++ b/re2c/src/ir/dfa/minimization.cc @@ -45,7 +45,7 @@ static void minimization_table( for (size_t j = 0; j < i; ++j) { dfa_state_t *s2 = states[j]; - tbl[i][j] = s1->ctxs != s2->ctxs + tbl[i][j] = s1->tags != s2->tags || s1->rule != s2->rule; } } @@ -133,11 +133,11 @@ static void minimization_moore( size_t *next = new size_t[count]; // see note [distinguish states by contexts] - std::map >, size_t> init; + std::map, size_t> init; for (size_t i = 0; i < count; ++i) { dfa_state_t *s = states[i]; - std::pair > key(s->rule, s->ctxs); + std::pair key(s->rule, s->tags); if (init.insert(std::make_pair(key, i)).second) { part[i] = i; diff --git a/re2c/src/ir/nfa/nfa.cc b/re2c/src/ir/nfa/nfa.cc index 33f802e8..acf450d2 100644 --- a/re2c/src/ir/nfa/nfa.cc +++ b/re2c/src/ir/nfa/nfa.cc @@ -220,8 +220,13 @@ nfa_t::nfa_t(const std::vector &rs) , rules(*new std::valarray(rs.size())) , contexts(*new std::vector) , root(compile_rules(rs, *this)) -{} - +{ + const size_t nrules = rules.size(); + const size_t ntags = contexts.size(); + for (size_t i = 0; i < nrules; ++i) { + init_tags(rules[i], ntags); + } +} nfa_t::~nfa_t() { diff --git a/re2c/src/ir/rule.cc b/re2c/src/ir/rule.cc index 6b55421d..6d962fe8 100644 --- a/re2c/src/ir/rule.cc +++ b/re2c/src/ir/rule.cc @@ -7,4 +7,16 @@ namespace re2c const size_t Rule::NONE = std::numeric_limits::max(); +void init_tags(Rule &rule, size_t ntags) +{ + bool *tags = new bool[ntags](); + for (size_t t = rule.ltag; t < rule.htag; ++t) { + tags[t] = true; + } + if (rule.trail.type == Trail::VAR) { + tags[rule.trail.pld.var] = true; + } + rule.tags = tags; +} + } // namespace re2c diff --git a/re2c/src/ir/rule.h b/re2c/src/ir/rule.h index f5500533..9b6a461a 100644 --- a/re2c/src/ir/rule.h +++ b/re2c/src/ir/rule.h @@ -40,7 +40,7 @@ struct Rule std::vector ctxfix; Trail trail; bool nullable; - + bool *tags; std::set shadow; bool reachable; @@ -51,13 +51,20 @@ struct Rule , ctxfix() , trail() , nullable(false) + , tags(NULL) , shadow() , reachable(false) {} + ~Rule() + { + delete[] tags; + } FORBID_COPY(Rule); }; +void init_tags(Rule &rule, size_t ntags); + } // namespace re2c #endif // _RE2C_IR_RULE_ diff --git a/re2c/src/ir/skeleton/path.h b/re2c/src/ir/skeleton/path.h index 4ff00ce6..89185afb 100644 --- a/re2c/src/ir/skeleton/path.h +++ b/re2c/src/ir/skeleton/path.h @@ -59,8 +59,7 @@ public: case Trail::VAR: { const size_t ctx = trail.pld.var; for (; tail != head; ++tail) { - std::set &ctxs = skel.nodes[*tail].ctxs; - if (ctxs.find(ctx) != ctxs.end()) { + if (skel.nodes[*tail].tags[ctx]) { return static_cast(head - tail) - 1; } } diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index 42cdaf4a..8b00b3b4 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -7,20 +7,18 @@ namespace re2c { -Node::Node() : - arcs(), - arcsets(), - rule(Rule::NONE), - ctxs() +Node::Node() + : arcs() + , arcsets() + , rule(Rule::NONE) + , tags(NULL) {} -void Node::init( - const std::set &cs, - size_t r, +void Node::init(const bool *ts, size_t r, const std::vector > &a) { rule = r; - ctxs = cs; + tags = ts; uint32_t lb = 0; std::vector >::const_iterator @@ -86,7 +84,7 @@ Skeleton::Skeleton( if (arcs.size() == 1 && arcs[0].first == nodes_count - 1) { arcs.clear(); } - nodes[i].init(s->ctxs, s->rule, arcs); + nodes[i].init(dfa.tagpool[s->tags], s->rule, arcs); } // initialize size of key diff --git a/re2c/src/ir/skeleton/skeleton.h b/re2c/src/ir/skeleton/skeleton.h index 456d646a..7f7561d4 100644 --- a/re2c/src/ir/skeleton/skeleton.h +++ b/re2c/src/ir/skeleton/skeleton.h @@ -36,10 +36,10 @@ struct Node arcs_t arcs; arcsets_t arcsets; size_t rule; - std::set ctxs; + const bool *tags; Node(); - void init(const std::set &cs, size_t r, + void init(const bool *ts, size_t r, const std::vector > &arcs); bool end() const; diff --git a/re2c/src/ir/tagpool.h b/re2c/src/ir/tagpool.h new file mode 100644 index 00000000..c7e2aa7e --- /dev/null +++ b/re2c/src/ir/tagpool.h @@ -0,0 +1,60 @@ +#ifndef _RE2C_IR_TAGPOOL_ +#define _RE2C_IR_TAGPOOL_ + +#include "src/util/ord_hash_set.h" + +namespace re2c +{ + +struct Tagpool +{ + const size_t ntags; + +private: + ord_hash_set_t pool; + +public: + explicit inline Tagpool(size_t n); + inline size_t insert(const bool *tags); + inline const bool *operator[](size_t idx); +}; + +Tagpool::Tagpool(size_t n) + : ntags(n) + , pool() +{ + const bool *zerotags = new bool[ntags](); + insert(zerotags); + delete[] zerotags; +} + +size_t Tagpool::insert(const bool *tags) +{ + return pool.insert(tags, ntags * sizeof(bool)); +} + +const bool *Tagpool::operator[](size_t idx) +{ + const bool *tags; + pool.deref(idx, tags); + return tags; +} + +inline void add_tags(bool *oldtags, const bool *newtags, size_t ntags) +{ + for (size_t i = 0; i < ntags; ++i) { + oldtags[i] |= newtags[i]; + } +} + +inline void add_tags_with_mask(bool *oldtags, const bool *newtags, + const bool *mask, size_t ntags) +{ + for (size_t i = 0; i < ntags; ++i) { + oldtags[i] |= newtags[i] & ~mask[i]; + } +} + +} // namespace re2c + +#endif // _RE2C_IR_TAGPOOL_ diff --git a/re2c/test/contexts/dedup0.i--input(custom).c b/re2c/test/contexts/dedup0.i--input(custom).c index e599b115..b3492e83 100644 --- a/re2c/test/contexts/dedup0.i--input(custom).c +++ b/re2c/test/contexts/dedup0.i--input(custom).c @@ -2,9 +2,6 @@ { YYCTYPE yych; - long yyctx0; - long yyctx2; - YYBACKUPCTX (); if (YYLESSTHAN (3)) YYFILL(3); yych = YYPEEK (); switch (yych) { @@ -19,7 +16,7 @@ yy4: YYSKIP (); YYBACKUP (); yych = YYPEEK (); - yyctx0 = YYDIST(); + YYBACKUPCTX (); switch (yych) { case 'a': goto yy5; case 'b': goto yy7; @@ -29,7 +26,7 @@ yy4: yy5: YYSKIP (); yych = YYPEEK (); - yyctx2 = YYDIST(); + YYBACKUPCTX (); switch (yych) { case 'z': goto yy13; default: goto yy6; @@ -46,7 +43,7 @@ yy7: default: goto yy9; } yy9: - YYRESTORECTX (yyctx0); + YYRESTORECTX (); {} yy10: YYSKIP (); @@ -57,7 +54,7 @@ yy10: default: goto yy12; } yy12: - YYRESTORECTX (yyctx0); + YYRESTORECTX (); {} yy13: YYSKIP (); @@ -68,7 +65,7 @@ yy13: default: goto yy15; } yy15: - YYRESTORECTX (yyctx2); + YYRESTORECTX (); {} } diff --git a/re2c/test/contexts/dedup0.i.c b/re2c/test/contexts/dedup0.i.c index d6cdb5c2..6d015264 100644 --- a/re2c/test/contexts/dedup0.i.c +++ b/re2c/test/contexts/dedup0.i.c @@ -2,9 +2,6 @@ { YYCTYPE yych; - long yyctx0; - long yyctx2; - YYCTXMARKER = YYCURSOR; if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); yych = *YYCURSOR; switch (yych) { @@ -17,7 +14,7 @@ yy3: {} yy4: yych = *(YYMARKER = ++YYCURSOR); - yyctx0 = (YYCURSOR - YYCTXMARKER); + YYCTXMARKER = YYCURSOR; switch (yych) { case 'a': goto yy5; case 'b': goto yy7; @@ -26,7 +23,7 @@ yy4: } yy5: yych = *++YYCURSOR; - yyctx2 = (YYCURSOR - YYCTXMARKER); + YYCTXMARKER = YYCURSOR; switch (yych) { case 'z': goto yy13; default: goto yy6; @@ -43,7 +40,7 @@ yy7: default: goto yy9; } yy9: - YYCURSOR = YYCTXMARKER + yyctx0; + YYCURSOR = YYCTXMARKER; {} yy10: ++YYCURSOR; @@ -54,7 +51,7 @@ yy10: default: goto yy12; } yy12: - YYCURSOR = YYCTXMARKER + yyctx0; + YYCURSOR = YYCTXMARKER; {} yy13: ++YYCURSOR; @@ -65,7 +62,7 @@ yy13: default: goto yy15; } yy15: - YYCURSOR = YYCTXMARKER + yyctx2; + YYCURSOR = YYCTXMARKER; {} } diff --git a/re2c/test/contexts/dedup2.i--input(custom).c b/re2c/test/contexts/dedup2.i--input(custom).c index 6cdc0cb7..1cffd83d 100644 --- a/re2c/test/contexts/dedup2.i--input(custom).c +++ b/re2c/test/contexts/dedup2.i--input(custom).c @@ -2,9 +2,6 @@ { YYCTYPE yych; - long yyctx0; - long yyctx2; - YYBACKUPCTX (); if (YYLESSTHAN (3)) YYFILL(3); yych = YYPEEK (); switch (yych) { @@ -13,19 +10,19 @@ } yy2: YYSKIP (); - yyctx0 = YYDIST(); + YYBACKUPCTX (); yych = YYPEEK (); goto yy6; - YYRESTORECTX (yyctx0); + YYRESTORECTX (); {} yy4: YYSKIP (); - yyctx0 = YYDIST(); + YYBACKUPCTX (); switch ((yych = YYPEEK ())) { case 'a': goto yy10; default: goto yy8; } - YYRESTORECTX (yyctx0); + YYRESTORECTX (); {} yy6: YYSKIP (); @@ -39,7 +36,7 @@ yy8: goto yy8; yy10: YYSKIP (); - yyctx2 = YYDIST(); + YYBACKUPCTX (); yych = YYPEEK (); goto yy8; } diff --git a/re2c/test/contexts/dedup2.i.c b/re2c/test/contexts/dedup2.i.c index 09518e22..b594ceab 100644 --- a/re2c/test/contexts/dedup2.i.c +++ b/re2c/test/contexts/dedup2.i.c @@ -2,9 +2,6 @@ { YYCTYPE yych; - long yyctx0; - long yyctx2; - YYCTXMARKER = YYCURSOR; if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3); yych = *YYCURSOR; switch (yych) { @@ -13,19 +10,19 @@ } yy2: ++YYCURSOR; - yyctx0 = (YYCURSOR - YYCTXMARKER); + YYCTXMARKER = YYCURSOR; yych = *YYCURSOR; goto yy6; - YYCURSOR = YYCTXMARKER + yyctx0; + YYCURSOR = YYCTXMARKER; {} yy4: ++YYCURSOR; - yyctx0 = (YYCURSOR - YYCTXMARKER); + YYCTXMARKER = YYCURSOR; switch ((yych = *YYCURSOR)) { case 'a': goto yy10; default: goto yy8; } - YYCURSOR = YYCTXMARKER + yyctx0; + YYCURSOR = YYCTXMARKER; {} yy6: ++YYCURSOR; @@ -39,7 +36,7 @@ yy8: goto yy8; yy10: ++YYCURSOR; - yyctx2 = (YYCURSOR - YYCTXMARKER); + YYCTXMARKER = YYCURSOR; yych = *YYCURSOR; goto yy8; } -- 2.40.0