From 2ad801e63600c2e3e4b4b58c16c96dbb1e3d9cf6 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 15 Nov 2016 17:09:53 +0000 Subject: [PATCH] Don't loose 'yyaccept' when fallback and initial states coincide. Before code generation each DFA state is assigned a specific action. Actions are mutually exclussive except for one: inital state may coincide with fallback state. The bug was in overriding fallback action with initial action (we lost 'yyaccept' which cause the wrong match). The bug was found with '--skeleton'. --- re2c/src/codegen/emit_action.cc | 18 +++++++--- re2c/src/ir/adfa/action.h | 54 +++++++++++++++-------------- re2c/src/ir/adfa/adfa.cc | 2 ++ re2c/test/yyaccept_initial.i.c | 61 +++++++++++++++++++++++++++++++++ re2c/test/yyaccept_initial.i.re | 11 ++++++ 5 files changed, 115 insertions(+), 31 deletions(-) create mode 100644 re2c/test/yyaccept_initial.i.c create mode 100644 re2c/test/yyaccept_initial.i.re diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index 12f82a46..1f29245b 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -23,7 +23,7 @@ class label_t; static void need(OutputFile &o, uint32_t ind, bool &readCh, size_t n, bool bSetMarker); static void emit_match(OutputFile &o, uint32_t ind, bool &readCh, const State *s); -static void emit_initial(OutputFile &o, uint32_t ind, bool &readCh, const State *s, const std::set &used_labels); +static void emit_initial(OutputFile &o, uint32_t ind, bool &readCh, const State *s, const std::set &used_labels, bool save_yyaccept); static void emit_save(OutputFile &o, uint32_t ind, bool &readCh, const State *s, bool save_yyaccept); static void emit_accept_binary(OutputFile &o, uint32_t ind, bool &readCh, const DFA &dfa, const State *s, size_t l, size_t r); static void emit_accept(OutputFile &o, uint32_t ind, bool &readCh, const DFA &dfa, const State *s); @@ -37,15 +37,16 @@ static void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule void emit_action(OutputFile &o, uint32_t ind, bool &readCh, const DFA &dfa, const State *s, const std::set &used_labels) { + const bool save_yyaccept = dfa.accepts.size() > 1; switch (s->action.type) { case Action::MATCH: emit_match(o, ind, readCh, s); break; case Action::INITIAL: - emit_initial(o, ind, readCh, s, used_labels); + emit_initial(o, ind, readCh, s, used_labels, save_yyaccept); break; case Action::SAVE: - emit_save(o, ind, readCh, s, dfa.accepts.size() > 1); + emit_save(o, ind, readCh, s, save_yyaccept); break; case Action::MOVE: break; @@ -81,9 +82,17 @@ void emit_match(OutputFile &o, uint32_t ind, bool &readCh, const State *s) } void emit_initial(OutputFile &o, uint32_t ind, bool &readCh, - const State *s, const std::set &used_labels) + const State *s, const std::set &used_labels, bool save_yyaccept) { + const Initial &initial = *s->action.info.initial; + if (used_labels.count(s->label)) { + const size_t save = initial.save; + if (save_yyaccept && save != Initial::NOSAVE) { + o.wind(ind).wstring(opts->yyaccept).ws(" = ") + .wu64(save).ws(";\n"); + } + if (s->fill != 0) { o.wstring(opts->input_api.stmt_skip(ind)); } else { @@ -91,7 +100,6 @@ void emit_initial(OutputFile &o, uint32_t ind, bool &readCh, } } - const Initial &initial = *s->action.info.initial; if (used_labels.count(initial.label)) { o.wstring(opts->labelPrefix).wlabel(initial.label).ws(":\n"); } diff --git a/re2c/src/ir/adfa/action.h b/re2c/src/ir/adfa/action.h index b7c048a0..8e0ae14e 100644 --- a/re2c/src/ir/adfa/action.h +++ b/re2c/src/ir/adfa/action.h @@ -1,6 +1,7 @@ #ifndef _RE2C_IR_ADFA_ACTION_ #define _RE2C_IR_ADFA_ACTION_ +#include #include #include "src/codegen/label.h" @@ -15,12 +16,16 @@ class State; struct Initial { + static const size_t NOSAVE; + label_t label; bool setMarker; + size_t save; - inline Initial (label_t l, bool b) + inline Initial (label_t l, bool b, size_t s) : label (l) , setMarker (b) + , save (s) {} }; @@ -53,54 +58,51 @@ public: {} ~Action () { - clear (); + if (type == INITIAL) { + delete info.initial; + } } void set_initial (label_t label, bool used_marker) { - clear (); - type = INITIAL; - info.initial = new Initial (label, used_marker); + if (type == MATCH) { + // ordinary state with no special action + type = INITIAL; + info.initial = new Initial(label, used_marker, Initial::NOSAVE); + } else if (type == SAVE) { + // fallback state: do not loose 'yyaccept' + type = INITIAL; + info.initial = new Initial(label, used_marker, info.save); + } else if (type == INITIAL) { + // already marked as initial, probably reuse mode + info.initial->label = label; + info.initial->setMarker = used_marker; + } else { + assert(false); + } } void set_save (size_t save) { - clear (); + assert(type == MATCH); type = SAVE; info.save = save; } void set_move () { - clear (); + assert(type == MATCH); type = MOVE; } void set_accept (const accept_t * accepts) { - clear (); + assert(type == MATCH); type = ACCEPT; info.accepts = accepts; } void set_rule (size_t rule) { - clear (); + assert(type == MATCH); type = RULE; info.rule = rule; } - -private: - void clear () - { - switch (type) - { - case INITIAL: - delete info.initial; - break; - case MATCH: - case SAVE: - case MOVE: - case ACCEPT: - case RULE: - break; - } - } }; } // namespace re2c diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 0bf795df..913de658 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -15,6 +15,8 @@ namespace re2c { +const size_t Initial::NOSAVE = std::numeric_limits::max(); + DFA::DFA ( const dfa_t &dfa , const std::vector &fill diff --git a/re2c/test/yyaccept_initial.i.c b/re2c/test/yyaccept_initial.i.c new file mode 100644 index 00000000..97fff034 --- /dev/null +++ b/re2c/test/yyaccept_initial.i.c @@ -0,0 +1,61 @@ +/* Generated by re2c */ +// In this example initial state coincides with fallback state +// which must update 'yyaccept'. There is a loop through initial +// state that changes 'yyaccept' value, so if the initial state +// forgets to update 'yyaccept', the wrong rule will match. + + +{ + YYCTYPE yych; + unsigned int yyaccept = 0; + goto yy0; +yy1: + yyaccept = 0; + ++YYCURSOR; +yy0: + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *(YYMARKER = YYCURSOR); + switch (yych) { + case 'a': goto yy3; + default: goto yy2; + } +yy2: + { 1 } +yy3: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'b': goto yy5; + default: goto yy4; + } +yy4: + YYCURSOR = YYMARKER; + if (yyaccept == 0) { + goto yy2; + } else { + goto yy6; + } +yy5: + yyaccept = 1; + YYMARKER = ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'a': goto yy7; + default: goto yy6; + } +yy6: + { 2 } +yy7: + ++YYCURSOR; + if (YYLIMIT <= YYCURSOR) YYFILL(1); + yych = *YYCURSOR; + switch (yych) { + case 'b': goto yy1; + default: goto yy4; + } +} + +re2c: warning: line 8: rule matches empty string [-Wmatch-empty-string] +re2c: warning: line 9: rule matches empty string [-Wmatch-empty-string] diff --git a/re2c/test/yyaccept_initial.i.re b/re2c/test/yyaccept_initial.i.re new file mode 100644 index 00000000..b1a1b794 --- /dev/null +++ b/re2c/test/yyaccept_initial.i.re @@ -0,0 +1,11 @@ +// In this example initial state coincides with fallback state +// which must update 'yyaccept'. There is a loop through initial +// state that changes 'yyaccept' value, so if the initial state +// forgets to update 'yyaccept', the wrong rule will match. + +/*!re2c + + "abab"* { 1 } + "ab"* { 2 } + +*/ -- 2.50.0