From 42b60fff68bd76f7e808b19c5ebbedc0050cd049 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 15 Mar 2016 08:08:45 +0000 Subject: [PATCH] Moved calculation of fallback states to earlier stage of DFA construction. We will need this later, when adding multiple context markers (liveness analyses during context marker deduplication). --- re2c/Makefile.am | 1 + re2c/src/ir/adfa/adfa.cc | 5 +++++ re2c/src/ir/adfa/adfa.h | 3 +++ re2c/src/ir/adfa/prepare.cc | 23 +++++++---------------- re2c/src/ir/compile.cc | 6 +++++- re2c/src/ir/dfa/dfa.h | 1 + re2c/src/ir/dfa/fallback.cc | 31 +++++++++++++++++++++++++++++++ 7 files changed, 53 insertions(+), 17 deletions(-) create mode 100644 re2c/src/ir/dfa/fallback.cc diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 1e73e3e3..0acdfe1c 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -83,6 +83,7 @@ SRC = \ src/ir/adfa/adfa.cc \ src/ir/adfa/prepare.cc \ src/ir/dfa/determinization.cc \ + src/ir/dfa/fallback.cc \ src/ir/dfa/fillpoints.cc \ src/ir/dfa/minimization.cc \ src/ir/regexp/encoding/enc.cc \ diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index cb41d035..83aa560a 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -16,6 +16,7 @@ namespace re2c DFA::DFA ( const dfa_t &dfa , const std::vector &fill + , const std::vector &fallback , Skeleton *skel , const charset_t &charset , const std::string &n @@ -73,6 +74,10 @@ DFA::DFA } *p = NULL; + for (size_t i = 0; i < fallback.size(); ++i) { + i2s[fallback[i]]->fallback = true; + } + delete[] i2s; } diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index e4b7e4ab..b85b4b2c 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -26,6 +26,7 @@ struct State const RuleInfo * rule; State * next; size_t fill; + bool fallback; bool isPreCtxt; bool isBase; @@ -37,6 +38,7 @@ struct State , rule (NULL) , next (0) , fill (0) + , fallback (false) , isPreCtxt (false) , isBase (false) , go () @@ -74,6 +76,7 @@ public: public: DFA ( const dfa_t &dfa , const std::vector &fill + , const std::vector &fallback , Skeleton *skel , const charset_t &charset , const std::string &n diff --git a/re2c/src/ir/adfa/prepare.cc b/re2c/src/ir/adfa/prepare.cc index bd51bb90..0856a0c3 100644 --- a/re2c/src/ir/adfa/prepare.cc +++ b/re2c/src/ir/adfa/prepare.cc @@ -181,24 +181,15 @@ void DFA::prepare () } } - // find backup states and create accept state (if needed) - if (default_state) - { - for (State * s = head; s; s = s->next) - { - if (s->rule) - { - for (uint32_t i = 0; i < s->go.nSpans; ++i) - { - if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE) - { - const uint32_t accept = static_cast (accepts.find_or_add (rules[s->rule->rank])); - s->action.set_save (accept); - } - } + // bind save actions to fallback states and create accept state (if needed) + if (default_state) { + for (State *s = head; s; s = s->next) { + if (s->fallback) { + const uint32_t accept = static_cast(accepts.find_or_add(rules[s->rule->rank])); + s->action.set_save(accept); } } - default_state->action.set_accept (&accepts); + default_state->action.set_accept(&accepts); } // split ``base'' states into two parts diff --git a/re2c/src/ir/compile.cc b/re2c/src/ir/compile.cc index 3516df25..cc058c39 100644 --- a/re2c/src/ir/compile.cc +++ b/re2c/src/ir/compile.cc @@ -61,8 +61,12 @@ smart_ptr compile (Spec & spec, Output & output, const std::string & cond, std::vector fill; fillpoints(dfa, fill); + // see note [fallback states] + std::vector fallback; + fallback_states(dfa, fallback); + // ADFA stands for 'DFA with actions' - DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line); + DFA *adfa = new DFA(dfa, fill, fallback, skeleton, cs, name, cond, line); /* * note [reordering DFA states] diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 32ea6f85..f58a0f03 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -52,6 +52,7 @@ enum dfa_minimization_t void minimization(dfa_t &dfa); void fillpoints(const dfa_t &dfa, std::vector &fill); +void fallback_states(const dfa_t &dfa, std::vector &fallback); } // namespace re2c diff --git a/re2c/src/ir/dfa/fallback.cc b/re2c/src/ir/dfa/fallback.cc new file mode 100644 index 00000000..8d3835f0 --- /dev/null +++ b/re2c/src/ir/dfa/fallback.cc @@ -0,0 +1,31 @@ +#include "src/ir/dfa/dfa.h" + +namespace re2c +{ + +/* note [fallback states] + * + * Find accepting states that may be shadowed by other accepting + * states: when the short rule matches, lexer must try to match + * longer rules; if this attempt is unsuccessful it must fallback + * to the short match. + */ +void fallback_states(const dfa_t &dfa, std::vector &fallback) +{ + const size_t count = dfa.states.size(); + for (size_t i = 0; i < count; ++i) { + dfa_state_t *s = dfa.states[i]; + if (s->rule != NULL) { + for (size_t c = 0; c < dfa.nchars; ++c) { + const size_t j = s->arcs[c]; + if (j != dfa_t::NIL + && dfa.states[j]->rule == NULL) { + fallback.push_back(i); + } + } + } + } +} + +} // namespace re2c + -- 2.40.0