src/ir/adfa/adfa.cc \
src/ir/adfa/prepare.cc \
src/ir/dfa/determinization.cc \
+ src/ir/dfa/fallback.cc \
src/ir/dfa/fillpoints.cc \
src/ir/dfa/minimization.cc \
src/ir/regexp/encoding/enc.cc \
DFA::DFA
( const dfa_t &dfa
, const std::vector<size_t> &fill
+ , const std::vector<size_t> &fallback
, Skeleton *skel
, const charset_t &charset
, const std::string &n
}
*p = NULL;
+ for (size_t i = 0; i < fallback.size(); ++i) {
+ i2s[fallback[i]]->fallback = true;
+ }
+
delete[] i2s;
}
const RuleInfo * rule;
State * next;
size_t fill;
+ bool fallback;
bool isPreCtxt;
bool isBase;
, rule (NULL)
, next (0)
, fill (0)
+ , fallback (false)
, isPreCtxt (false)
, isBase (false)
, go ()
public:
DFA ( const dfa_t &dfa
, const std::vector<size_t> &fill
+ , const std::vector<size_t> &fallback
, Skeleton *skel
, const charset_t &charset
, const std::string &n
}
}
- // find backup states and create accept state (if needed)
- if (default_state)
- {
- for (State * s = head; s; s = s->next)
- {
- if (s->rule)
- {
- for (uint32_t i = 0; i < s->go.nSpans; ++i)
- {
- if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE)
- {
- const uint32_t accept = static_cast<uint32_t> (accepts.find_or_add (rules[s->rule->rank]));
- s->action.set_save (accept);
- }
- }
+ // bind save actions to fallback states and create accept state (if needed)
+ if (default_state) {
+ for (State *s = head; s; s = s->next) {
+ if (s->fallback) {
+ const uint32_t accept = static_cast<uint32_t>(accepts.find_or_add(rules[s->rule->rank]));
+ s->action.set_save(accept);
}
}
- default_state->action.set_accept (&accepts);
+ default_state->action.set_accept(&accepts);
}
// split ``base'' states into two parts
std::vector<size_t> fill;
fillpoints(dfa, fill);
+ // see note [fallback states]
+ std::vector<size_t> fallback;
+ fallback_states(dfa, fallback);
+
// ADFA stands for 'DFA with actions'
- DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line);
+ DFA *adfa = new DFA(dfa, fill, fallback, skeleton, cs, name, cond, line);
/*
* note [reordering DFA states]
void minimization(dfa_t &dfa);
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill);
+void fallback_states(const dfa_t &dfa, std::vector<size_t> &fallback);
} // namespace re2c
--- /dev/null
+#include "src/ir/dfa/dfa.h"
+
+namespace re2c
+{
+
+/* note [fallback states]
+ *
+ * Find accepting states that may be shadowed by other accepting
+ * states: when the short rule matches, lexer must try to match
+ * longer rules; if this attempt is unsuccessful it must fallback
+ * to the short match.
+ */
+void fallback_states(const dfa_t &dfa, std::vector<size_t> &fallback)
+{
+ const size_t count = dfa.states.size();
+ for (size_t i = 0; i < count; ++i) {
+ dfa_state_t *s = dfa.states[i];
+ if (s->rule != NULL) {
+ for (size_t c = 0; c < dfa.nchars; ++c) {
+ const size_t j = s->arcs[c];
+ if (j != dfa_t::NIL
+ && dfa.states[j]->rule == NULL) {
+ fallback.push_back(i);
+ }
+ }
+ }
+ }
+}
+
+} // namespace re2c
+