From e289b22747f0165ef381b3f67c509a17ee55587b Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Sat, 30 Apr 2016 16:35:40 +0100 Subject: [PATCH] Count contexts after deduplication. --- re2c/src/ir/adfa/adfa.cc | 20 +++++++++++--- re2c/src/ir/adfa/adfa.h | 2 +- re2c/src/ir/adfa/prepare.cc | 8 ------ re2c/src/ir/compile.cc | 19 +++---------- re2c/src/ir/dfa/context_deduplication.cc | 34 +++++------------------- re2c/src/ir/dfa/dfa.h | 2 +- 6 files changed, 29 insertions(+), 56 deletions(-) diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 414e1d6d..8ab2539a 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -24,7 +24,7 @@ DFA::DFA , const std::string &n , const std::string &c , uint32_t l - , bool base_ctx + , size_t used_tags ) : accepts () , skeleton (skel) @@ -41,9 +41,23 @@ DFA::DFA // statistics , max_fill (0) , need_backup (false) - , need_backupctx (opts->contexts) + + // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used + , need_backupctx (used_tags > 0 || opts->contexts) , need_accept (false) - , base_ctxmarker (base_ctx) + + // Non-trailing contexts imply the existence of base context marker + // that points at the beginning of lexeme. First, it is a feature + // of re2c API. Second, it simplifies implementation (otherwise + // it would be hard to mix generic API and fixed-length contexts). + // + // The only case without base context marker is when: + // - only trailing contexts are allowed + // - they don't overlap (one marker is enough for all of them) + // - with generic API fixed-length contexts are forbidden + // Note that in this case, if generic API is used, fixed-length + // contexts are forbidden (which may cause additional overlaps). + , base_ctxmarker (used_tags > 1 || opts->contexts) { const size_t nstates = dfa.states.size(); const size_t nchars = dfa.nchars; diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index 4816cb18..eb60ba9e 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -87,7 +87,7 @@ public: , const std::string &n , const std::string &c , uint32_t l - , bool base_ctx + , size_t used_tags ); ~DFA (); void reorder(); diff --git a/re2c/src/ir/adfa/prepare.cc b/re2c/src/ir/adfa/prepare.cc index 050fd2ab..ee9dff21 100644 --- a/re2c/src/ir/adfa/prepare.cc +++ b/re2c/src/ir/adfa/prepare.cc @@ -194,14 +194,6 @@ void DFA::calc_stats () // determine if 'YYMARKER' or 'YYBACKUP'/'YYRESTORE' pair is used need_backup = accepts.size () > 0; - // determine if 'YYCTXMARKER' or 'YYBACKUPCTX'/'YYRESTORECTX' pair is used - for (State * s = head; s; s = s->next) { - if (!s->ctxs.empty()) { - need_backupctx = true; - break; - } - } - // determine if 'yyaccept' variable is used need_accept = accepts.size () > 1; } diff --git a/re2c/src/ir/compile.cc b/re2c/src/ir/compile.cc index effc28e6..0a4e4bc0 100644 --- a/re2c/src/ir/compile.cc +++ b/re2c/src/ir/compile.cc @@ -72,25 +72,12 @@ static smart_ptr compile_rules( std::vector fallback; fallback_states(dfa, fallback); - // Non-trailing contexts imply the existence of base context marker - // that points at the beginning of lexeme. First, it is a feature - // of re2c API. Second, it simplifies implementation (otherwise - // it would be hard to mix generic API and fixed-length contexts). - // - // The only case without base context marker is when: - // - only trailing contexts are allowed - // - they don't overlap (one marker is enough for all of them) - // - with generic API fixed-length contexts are forbidden - // Note that in this case, if generic API is used, fixed-length - // contexts are forbidden (which may cause additional overlaps). - const bool multiple_ctxmarkers = deduplicate_contexts(dfa, fallback); - const bool base_ctxmarker - = multiple_ctxmarkers - || opts->contexts; + // try to minimize the number of tag variables + const size_t used_tags = deduplicate_contexts(dfa, fallback); // ADFA stands for 'DFA with actions' DFA *adfa = new DFA(dfa, fill, fallback, skeleton, cs, - name, cond, line, base_ctxmarker); + name, cond, line, used_tags); // see note [reordering DFA states] adfa->reorder(); diff --git a/re2c/src/ir/dfa/context_deduplication.cc b/re2c/src/ir/dfa/context_deduplication.cc index db9f1a70..96098857 100644 --- a/re2c/src/ir/dfa/context_deduplication.cc +++ b/re2c/src/ir/dfa/context_deduplication.cc @@ -44,13 +44,12 @@ static void calc_live( } } -bool deduplicate_contexts( - const dfa_t &dfa, +size_t deduplicate_contexts(dfa_t &dfa, const std::vector &fallback) { const size_t nctxs = dfa.contexts.size(); if (nctxs < 2) { - return false; + return nctxs; } std::set fbctxs; @@ -124,32 +123,13 @@ bool deduplicate_contexts( head[0] = c; } } - const bool multiple_contexts = head[0] != END; - -/* - std::vector c2c; - std::vector > part; + size_t ncontexts = 0; for (size_t i = 0; i < nctxs; ++i) { - size_t j; - for (j = 0; j < partn.size(); ++j) { - std::vector &p = part[j]; - size_t k; - for (k = 0; k < p.size(); ++k) { - if (xxx[i * nctxs + p[k]]) { - break; - } - } - if (k == p.size()) { - break; - } + if (part[i] == i) { + ++ncontexts; } - if (j == part.size()) { - part.push_back(std::vector()); - } - part[j].push_back(i); - c2c[i] = part[j][0]; } -*/ + for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; std::set ctxs; @@ -162,7 +142,7 @@ bool deduplicate_contexts( dfa.contexts[i].uniqname = dfa.contexts[part[i]].uniqname; } - return multiple_contexts; + return ncontexts; } diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 494b43af..2f1e720c 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -63,7 +63,7 @@ void check_context_selfoverlap(ord_hash_set_t &kernels, void minimization(dfa_t &dfa); void fillpoints(const dfa_t &dfa, std::vector &fill); void fallback_states(const dfa_t &dfa, std::vector &fallback); -bool deduplicate_contexts(const dfa_t &dfa, const std::vector &fallback); +size_t deduplicate_contexts(dfa_t &dfa, const std::vector &fallback); } // namespace re2c -- 2.40.0