src/ir/nfa/regexps2nfa.cc \
src/ir/adfa/adfa.cc \
src/ir/adfa/prepare.cc \
+ src/ir/dfa/dead_rules.cc \
src/ir/dfa/determinization.cc \
src/ir/dfa/fallback.cc \
src/ir/dfa/fillpoints.cc \
src/ir/skeleton/generate_data.cc \
src/ir/skeleton/maxpath.cc \
src/ir/skeleton/skeleton.cc \
- src/ir/skeleton/unreachable.cc \
src/ir/tag.cc \
src/ir/tagpool.cc \
src/main.cc \
head->action.set_initial (initial_label, head->action.type == Action::SAVE);
warn_undefined_control_flow(*skeleton);
- warn_unreachable(*skeleton);
if (opts->target == opt_t::SKELETON) {
if (output.skeletons.insert (name).second)
// but prior to any other DFA transformations
Skeleton *skeleton = new Skeleton(dfa, cs, defrule, name, cond, line);
+ cutoff_dead_rules(dfa, defrule, cond);
+
// try to minimize the number of tag variables
const size_t used_tags = deduplicate_tags(dfa);
--- /dev/null
+#include "src/conf/opt.h"
+#include "src/ir/dfa/dfa.h"
+#include "src/util/forbid_copy.h"
+#include "src/globals.h"
+
+namespace re2c
+{
+
+/* note [unreachable rules]
+ *
+ * DFA may contain useless final states. Such states may
+ * appear as a result of:
+ * - (part of) one rule being shadowed by another rule,
+ * e.g. rule [ab] partially shadows [ac] and completely
+ * shadows [a]
+ *
+ * - infinite rules that greedily eat all input characters
+ * and never stop (they either fail on YYFILL or crash),
+ * e.g. [^]*
+ *
+ * - rules that contain never-matching link, e.g. '[]'
+ * with option '--empty-class match-none'
+ *
+ * Useless final states should be eliminated so that they
+ * don't interfere with further analyses and optimizations.
+ * If all final states of a rule are useless, then the whole
+ * rule is unreachable and should be reported.
+ *
+ * In order to find out if a given final state is useless,
+ * we have to find out if all outgoing paths from this state
+ * match longer rules (otherwise, some paths go to default
+ * state and fallback to this state). We do this by finding
+ * all states that have transitions to default state and back
+ * propagation of "none-rule" from these states. As the back
+ * propagation meets the first final state on its way, it
+ * substitutes "none-rule" with the corresponding rule,
+ * which is further propagated back to the start state of DFA.
+ */
+
+// reversed DFA
+struct rdfa_t
+{
+ struct arc_t
+ {
+ size_t dest;
+ arc_t *next;
+ };
+
+ struct state_t
+ {
+ arc_t *arcs;
+ size_t rule;
+ bool fallthru;
+ };
+
+ size_t nstates;
+ size_t nrules;
+ state_t *states;
+ arc_t *arcs;
+
+ explicit rdfa_t(const dfa_t &dfa)
+ : nstates(dfa.states.size())
+ , nrules(dfa.rules.size())
+ , states(new state_t[nstates]())
+ , arcs(new arc_t[nstates * dfa.nchars])
+ {
+ // init states
+ for (size_t i = 0; i < nstates; ++i) {
+ state_t &s = states[i];
+ s.arcs = NULL;
+ const size_t r = dfa.states[i]->rule;
+ s.rule = r == Rule::NONE ? nrules : r;
+ s.fallthru = false;
+ }
+ // init arcs
+ arc_t *a = arcs;
+ for (size_t i = 0; i < nstates; ++i) {
+ dfa_state_t *s = dfa.states[i];
+ for (size_t c = 0; c < dfa.nchars; ++c) {
+ const size_t j = s->arcs[c];
+ if (j != dfa_t::NIL) {
+ a->dest = i;
+ a->next = states[j].arcs;
+ states[j].arcs = a++;
+ } else {
+ states[i].fallthru = true;
+ }
+ }
+ }
+ }
+
+ ~rdfa_t()
+ {
+ delete[] states;
+ delete[] arcs;
+ }
+
+ FORBID_COPY(rdfa_t);
+};
+
+static void backprop(const rdfa_t &rdfa,
+ bool *reachable,
+ size_t rule,
+ size_t state)
+{
+ // "none-rule" is unreachable from final states:
+ // be careful to mask it before propagating
+ const rdfa_t::state_t &s = rdfa.states[state];
+ if (rule == rdfa.nrules) {
+ rule = s.rule;
+ }
+
+ // if the rule has already been set, than either it's a loop
+ // or another branch of back propagation has already been here,
+ // in both cases we should stop: there's nothing new to propagate
+ bool &reach = reachable[state * (rdfa.nrules + 1) + rule];
+ if (reach) return;
+ reach = true;
+
+ for (const rdfa_t::arc_t *a = s.arcs; a; a = a->next) {
+ backprop(rdfa, reachable, rule, a->dest);
+ }
+}
+
+static void calc_reachable(const rdfa_t &rdfa, bool *reachable)
+{
+ for (size_t i = 0; i < rdfa.nstates; ++i) {
+ const rdfa_t::state_t &s = rdfa.states[i];
+ if (s.fallthru) {
+ backprop(rdfa, reachable, s.rule, i);
+ }
+ }
+}
+
+static void warn_unreachable(const dfa_t &dfa, size_t defrule,
+ const std::string &cond, const bool *reachable)
+{
+ const size_t nstates = dfa.states.size();
+ const size_t nrules = dfa.rules.size();
+
+ for (size_t i = 0; i < nstates; ++i) {
+ const bool *reach = &reachable[i * (nrules + 1)];
+ const size_t r = dfa.states[i]->rule;
+ if (r != Rule::NONE && !reach[r]) {
+ // skip last rule (it's the NONE-rule)
+ for (size_t j = 0; j < nrules; ++j) {
+ if (reach[j]) {
+ dfa.rules[r].shadow.insert(dfa.rules[j].info->loc.line);
+ }
+ }
+ }
+ }
+
+ for (size_t i = 0; i < nrules; ++i) {
+ // default rule '*' should not be reported
+ if (i != defrule && !reachable[i]) {
+ warn.unreachable_rule(cond, dfa.rules[i]);
+ }
+ }
+}
+
+void cutoff_dead_rules(dfa_t &dfa, size_t defrule, const std::string &cond)
+{
+ const rdfa_t rdfa(dfa);
+ bool *reachable = new bool[rdfa.nstates * (rdfa.nrules + 1)]();
+
+ calc_reachable(rdfa, reachable);
+ warn_unreachable(dfa, defrule, cond, reachable);
+
+ delete[] reachable;
+}
+
+} // namespace re2c
+
void minimization(dfa_t &dfa);
void fillpoints(const dfa_t &dfa, std::vector<size_t> &fill);
bool fallback_state(const dfa_t &dfa, size_t i);
+void cutoff_dead_rules(dfa_t &dfa, size_t defrule, const std::string &cond);
size_t deduplicate_tags(dfa_t &dfa);
} // namespace re2c
size_t trail;
size_t tags;
std::set<uint32_t> shadow;
- bool reachable;
Rule()
: info(NULL)
, trail(Tag::NONE)
, tags(0)
, shadow()
- , reachable(false)
{}
FORBID_COPY(Rule);
uint32_t maxpath(const Skeleton &skel);
void warn_undefined_control_flow(const Skeleton &skel);
void fprint_default_path(FILE *f, const Skeleton &skel, const path_t &p);
-void warn_unreachable(const Skeleton &skel);
void emit_data(const Skeleton &skel);
void emit_prolog(OutputFile & o);
void emit_start(const Skeleton &skel, OutputFile &o, size_t maxfill,
+++ /dev/null
-#include "src/util/c99_stdint.h"
-#include <set>
-
-#include "src/conf/warn.h"
-#include "src/globals.h"
-#include "src/ir/skeleton/path.h"
-#include "src/ir/skeleton/skeleton.h"
-
-namespace re2c
-{
-
-static void calc_reachable(
- const Skeleton &skel,
- std::vector<uint8_t> &loops,
- std::vector<std::set<size_t> > &reachs,
- size_t i)
-{
- const Node &node = skel.nodes[i];
- uint8_t &loop = loops[i];
- std::set<size_t> &reach = reachs[i];
-
- if (!reach.empty()) {
- return;
- } else if (node.end()) {
- reach.insert(node.rule);
- } else if (loop < 2) {
- local_inc _(loop);
- Node::arcs_t::const_iterator
- arc = node.arcs.begin(),
- end = node.arcs.end();
- for (; arc != end; ++arc) {
- const size_t j = arc->first;
- calc_reachable(skel, loops, reachs, j);
- reach.insert(reachs[j].begin(), reachs[j].end());
- }
- }
-}
-
-void warn_unreachable(const Skeleton &skel)
-{
- // calculate reachable rules
- const size_t nnodes = skel.nodes_count;
- std::vector<uint8_t> loops(nnodes);
- std::vector<std::set<size_t> > reachs(nnodes);
- calc_reachable(skel, loops, reachs, 0);
-
- std::valarray<Rule> &rules = skel.rules;
- const size_t nrules = rules.size();
-
- for (size_t i = 0; i < nnodes; ++i) {
- const size_t r1 = skel.nodes[i].rule;
- if (r1 == Rule::NONE) {
- continue;
- }
- std::set<size_t>::const_iterator
- rule = reachs[i].begin(),
- end = reachs[i].end();
- for (; rule != end; ++rule) {
- const size_t r2 = *rule;
- if (r2 == Rule::NONE || r1 == r2) {
- rules[r1].reachable = true;
- } else {
- rules[r1].shadow.insert(rules[r2].info->loc.line);
- }
- }
- }
-
- // warn about unreachable rules:
- // - rules that are shadowed by other rules, e.g. rule '[a]' is shadowed by '[a] [^]'
- // - infinite rules that consume infinitely many characters and fail on YYFILL, e.g. '[^]*'
- // - rules that contain never-matching link, e.g. '[]' with option '--empty-class match-none'
- // default rule '*' should not be reported
- for (size_t i = 0; i < nrules; ++i) {
- const Rule &rule = rules[i];
- if (i != skel.defrule && !rule.reachable) {
- warn.unreachable_rule(skel.cond, rule);
- }
- }
-}
-
-} // namespace re2c
}
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 6: empty character class [-Wempty-character-class]
-re2c: warning: line 7: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 6: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 7: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 10: empty character class [-Wempty-character-class]
-re2c: warning: line 11: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 10: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 11: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 14: empty character class [-Wempty-character-class]
re2c: warning: line 14: empty character class [-Wempty-character-class]
re2c: warning: line 14: empty character class [-Wempty-character-class]
-re2c: warning: line 15: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 14: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 15: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 18: empty character class [-Wempty-character-class]
re2c: warning: line 18: empty character class [-Wempty-character-class]
re2c: warning: line 18: empty character class [-Wempty-character-class]
-re2c: warning: line 19: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 18: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 19: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
}
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
}
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
}
re2c: warning: line 2: empty character class [-Wempty-character-class]
-re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 2: unreachable rule [-Wunreachable-rules]
+re2c: warning: line 3: control flow is undefined for strings that match '', use default rule '*' [-Wundefined-control-flow]
re2c: warning: line 384: column 32: escape has no effect: '\.' [-Wuseless-escape]
re2c: warning: line 391: column 27: escape has no effect: '\[' [-Wuseless-escape]
re2c: warning: line 392: column 11: escape has no effect: '\[' [-Wuseless-escape]
+re2c: warning: line 648: unreachable rule in condition 'INITIAL' (shadowed by rules at lines 406, 481, 491, 555, 627, 632, 637) [-Wunreachable-rules]
re2c: warning: line 648: unreachable rule in condition 'ST_DOUBLE_QUOTES' (shadowed by rules at lines 582, 587) [-Wunreachable-rules]
re2c: warning: line 623: unreachable rule in condition 'ST_OFFSET' (shadowed by rule at line 573) [-Wunreachable-rules]
re2c: warning: line 648: unreachable rule in condition 'ST_RAW' (shadowed by rules at lines 500, 541, 643) [-Wunreachable-rules]
re2c: warning: line 384: column 32: escape has no effect: '\.' [-Wuseless-escape]
re2c: warning: line 391: column 27: escape has no effect: '\[' [-Wuseless-escape]
re2c: warning: line 392: column 11: escape has no effect: '\[' [-Wuseless-escape]
+re2c: warning: line 648: unreachable rule in condition 'INITIAL' (shadowed by rules at lines 406, 481, 491, 555, 627, 632, 637) [-Wunreachable-rules]
re2c: warning: line 648: unreachable rule in condition 'ST_DOUBLE_QUOTES' (shadowed by rules at lines 582, 587) [-Wunreachable-rules]
re2c: warning: line 623: unreachable rule in condition 'ST_OFFSET' (shadowed by rule at line 573) [-Wunreachable-rules]
re2c: warning: line 648: unreachable rule in condition 'ST_RAW' (shadowed by rules at lines 500, 541, 643) [-Wunreachable-rules]
re2c: warning: line 384: column 32: escape has no effect: '\.' [-Wuseless-escape]
re2c: warning: line 391: column 27: escape has no effect: '\[' [-Wuseless-escape]
re2c: warning: line 392: column 11: escape has no effect: '\[' [-Wuseless-escape]
+re2c: warning: line 648: unreachable rule in condition 'INITIAL' (shadowed by rules at lines 406, 481, 491, 555, 627, 632, 637) [-Wunreachable-rules]
re2c: warning: line 648: unreachable rule in condition 'ST_DOUBLE_QUOTES' (shadowed by rules at lines 582, 587) [-Wunreachable-rules]
re2c: warning: line 623: unreachable rule in condition 'ST_OFFSET' (shadowed by rule at line 573) [-Wunreachable-rules]
re2c: warning: line 648: unreachable rule in condition 'ST_RAW' (shadowed by rules at lines 500, 541, 643) [-Wunreachable-rules]
re2c: warning: line 4: column 32: escape has no effect: '\.' [-Wuseless-escape]
re2c: warning: line 11: column 27: escape has no effect: '\[' [-Wuseless-escape]
re2c: warning: line 12: column 11: escape has no effect: '\[' [-Wuseless-escape]
+re2c: warning: line 55: unreachable rule in condition 'INITIAL' (shadowed by rules at lines 24, 35, 36, 42, 51, 52, 53) [-Wunreachable-rules]
re2c: warning: line 55: unreachable rule in condition 'ST_DOUBLE_QUOTES' (shadowed by rules at lines 48, 49) [-Wunreachable-rules]
re2c: warning: line 50: unreachable rule in condition 'ST_OFFSET' (shadowed by rule at line 46) [-Wunreachable-rules]
re2c: warning: line 55: unreachable rule in condition 'ST_RAW' (shadowed by rules at lines 37, 39, 54) [-Wunreachable-rules]
re2c: warning: line 4: column 32: escape has no effect: '\.' [-Wuseless-escape]
re2c: warning: line 11: column 27: escape has no effect: '\[' [-Wuseless-escape]
re2c: warning: line 12: column 11: escape has no effect: '\[' [-Wuseless-escape]
+re2c: warning: line 55: unreachable rule in condition 'INITIAL' (shadowed by rules at lines 24, 35, 36, 42, 51, 52, 53) [-Wunreachable-rules]
re2c: warning: line 55: unreachable rule in condition 'ST_DOUBLE_QUOTES' (shadowed by rules at lines 48, 49) [-Wunreachable-rules]
re2c: warning: line 50: unreachable rule in condition 'ST_OFFSET' (shadowed by rule at line 46) [-Wunreachable-rules]
re2c: warning: line 55: unreachable rule in condition 'ST_RAW' (shadowed by rules at lines 37, 39, 54) [-Wunreachable-rules]
}
re2c: warning: line 253: rule matches empty string [-Wmatch-empty-string]
re2c: warning: line 288: empty character class [-Wempty-character-class]
+re2c: warning: line 289: unreachable rule [-Wunreachable-rules]
re2c: warning: line 290: control flow is undefined for strings that match
'[\x0-\x8\xA-\xC\xE-\x1F\x21-\x2E\x30-\xFF]'
'\x2F [\x0-\x29\x2B-\xFF]'
, use default rule '*' [-Wundefined-control-flow]
-re2c: warning: line 289: unreachable rule [-Wunreachable-rules]
--- /dev/null
+/* Generated by re2c */
+
+{
+ YYCTYPE yych;
+ if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy4;
+ default: goto yy2;
+ }
+yy2:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy7;
+ { 1 }
+yy4:
+ ++YYCURSOR;
+ switch ((yych = *YYCURSOR)) {
+ case 'a': goto yy8;
+ default: goto yy5;
+ }
+yy5:
+ { 0 }
+yy6:
+ ++YYCURSOR;
+ if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+yy7:
+ switch (yych) {
+ case 'a': goto yy4;
+ default: goto yy6;
+ }
+yy8:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy5;
+}
+
+
+
+{
+ YYCTYPE yych;
+ if ((YYLIMIT - YYCURSOR) < 4) YYFILL(4);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy13;
+ default: goto yy11;
+ }
+yy11:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy16;
+ { 1 }
+yy13:
+ ++YYCURSOR;
+ switch ((yych = *YYCURSOR)) {
+ case 'a': goto yy17;
+ default: goto yy14;
+ }
+yy14:
+ { 0 }
+yy15:
+ ++YYCURSOR;
+ if ((YYLIMIT - YYCURSOR) < 3) YYFILL(3);
+ yych = *YYCURSOR;
+yy16:
+ switch (yych) {
+ case 'a': goto yy13;
+ default: goto yy15;
+ }
+yy17:
+ yych = *++YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy18;
+ default: goto yy14;
+ }
+yy18:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy14;
+}
+
+
+
+{
+ YYCTYPE yych;
+ goto yy19;
+yy20:
+ ++YYCURSOR;
+yy19:
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy23;
+ default: goto yy20;
+ }
+yy22:
+ { 0 }
+yy23:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy22;
+}
+
+
+
+{
+ YYCTYPE yych;
+ if ((YYLIMIT - YYCURSOR) < 2) YYFILL(2);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy28;
+ default: goto yy26;
+ }
+yy26:
+ ++YYCURSOR;
+ yych = *YYCURSOR;
+ goto yy32;
+ { 1 }
+yy28:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+ switch (yych) {
+ case 'a': goto yy28;
+ default: goto yy30;
+ }
+yy30:
+ { 0 }
+yy31:
+ ++YYCURSOR;
+ if (YYLIMIT <= YYCURSOR) YYFILL(1);
+ yych = *YYCURSOR;
+yy32:
+ switch (yych) {
+ case 'a': goto yy28;
+ default: goto yy31;
+ }
+}
+
+re2c: warning: line 3: unreachable rule (shadowed by rule at line 2) [-Wunreachable-rules]
+re2c: warning: line 8: unreachable rule (shadowed by rule at line 7) [-Wunreachable-rules]
+re2c: warning: line 12: rule matches empty string [-Wmatch-empty-string]
+re2c: warning: line 13: unreachable rule (shadowed by rule at line 12) [-Wunreachable-rules]
+re2c: warning: line 18: unreachable rule (shadowed by rule at line 17) [-Wunreachable-rules]
--- /dev/null
+/*!re2c
+ [^a]* ("aa"|"a") { 0 }
+ [^] { 1 }
+*/
+
+/*!re2c
+ [^a]* "a"{1,3} { 0 }
+ [^] { 1 }
+*/
+
+/*!re2c
+ [^a]* "a"? { 0 }
+ [^] { 1 }
+*/
+
+/*!re2c
+ [^a]* "a"+ { 0 }
+ [^] { 1 }
+*/