From 4385e9d67fb7a64b7f6b4efb194a889d63309af4 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Wed, 9 Sep 2015 15:30:09 +0100 Subject: [PATCH] Make skeleton a part of DFA. This let us create skeletom right after DFA creation (but befor DFA has been mangled in different ways), but call skeleton methods any time. Undefined control flow is now checked at the time of real code generation, that's why all those tests that use '-r' changed: re2c stopped reporting 'rules:re2c' blocks and reports 'use:re2c' blocks instead. --- re2c/bootstrap/src/parse/parser.cc | 4 ++-- re2c/src/codegen/emit_dfa.cc | 6 ++++-- re2c/src/codegen/skeleton/generate_code.cc | 4 ++-- re2c/src/codegen/skeleton/skeleton.h | 5 ++--- re2c/src/ir/bytecode/bytecode.cc | 14 ++------------ re2c/src/ir/bytecode/bytecode.h | 2 +- re2c/src/ir/dfa/dfa.cc | 13 ++++++++++--- re2c/src/ir/dfa/dfa.h | 8 +++++--- re2c/src/parse/parser.ypp | 4 ++-- re2c/test/repeat-01.cgir.c | 8 ++++++-- re2c/test/repeat-02.cgir.c | 8 ++++++-- re2c/test/repeat-03.cgir.c | 2 -- re2c/test/repeat-04.cgir.c | 4 ++-- re2c/test/repeat-06.gir.c | 1 - re2c/test/repeat-07.gir.c | 1 - re2c/test/repeat-07_error.gir.c | 1 - re2c/test/reuse_conds_default_0.cgir.c | 2 -- re2c/test/reuse_conds_default_1.cgir.c | 1 - re2c/test/reuse_conds_setup_0.cgir.c | 2 -- re2c/test/reuse_conds_setup_1.cgir.c | 2 -- 20 files changed, 44 insertions(+), 48 deletions(-) diff --git a/re2c/bootstrap/src/parse/parser.cc b/re2c/bootstrap/src/parse/parser.cc index 6448acd8..3393999e 100644 --- a/re2c/bootstrap/src/parse/parser.cc +++ b/re2c/bootstrap/src/parse/parser.cc @@ -2778,7 +2778,7 @@ void parse(Scanner& i, Output & o) it->second = it->second ? mkAlt (def_rule, it->second) : def_rule; } - dfa_map[it->first] = genCode(it->second, o, it->first); + dfa_map[it->first] = genCode(it->second, o); } if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) { @@ -2805,7 +2805,7 @@ void parse(Scanner& i, Output & o) { if (parseMode != Scanner::Reuse) { - dfa_map[""] = genCode(spec, o, ""); + dfa_map[""] = genCode(spec, o); } if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end()) { diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 9ef8283e..aad95af2 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -115,9 +115,11 @@ void DFA::emit(Output & output, uint32_t& ind, const std::string& condName, bool head->action.set_initial (initial_label, head->action.type == Action::SAVE); // Generate prolog + skeleton->warn_undefined_control_flow (o.get_block_line (), condName); if (flag_skeleton) { - emit_prolog (o, ind, output.max_fill); + skeleton->emit_data (o.get_block_line (), condName, o.file_name); + Skeleton::emit_prolog (o, ind, output.max_fill); } if (bProlog) { @@ -233,7 +235,7 @@ void DFA::emit(Output & output, uint32_t& ind, const std::string& condName, bool } if (flag_skeleton) { - emit_epilog (o, ind); + Skeleton::emit_epilog (o, ind); } // Cleanup diff --git a/re2c/src/codegen/skeleton/generate_code.cc b/re2c/src/codegen/skeleton/generate_code.cc index 4f74340d..1c6421df 100644 --- a/re2c/src/codegen/skeleton/generate_code.cc +++ b/re2c/src/codegen/skeleton/generate_code.cc @@ -4,7 +4,7 @@ namespace re2c { -void emit_prolog (OutputFile & o, uint32_t ind, uint32_t maxfill) +void Skeleton::emit_prolog (OutputFile & o, uint32_t ind, uint32_t maxfill) { std::string yyctype; switch (encoding.szCodeUnit ()) @@ -60,7 +60,7 @@ void emit_prolog (OutputFile & o, uint32_t ind, uint32_t maxfill) o << indent (ind + 2) << "const YYCTYPE * token = cursor;\n"; } -void emit_epilog (OutputFile & o, uint32_t ind) +void Skeleton::emit_epilog (OutputFile & o, uint32_t ind) { o << indent (ind + 1) << "}\n"; o << "#undef " << mapCodeName["YYCTYPE"] << "\n"; diff --git a/re2c/src/codegen/skeleton/skeleton.h b/re2c/src/codegen/skeleton/skeleton.h index a9ae9f7f..ef11f470 100644 --- a/re2c/src/codegen/skeleton/skeleton.h +++ b/re2c/src/codegen/skeleton/skeleton.h @@ -57,6 +57,8 @@ struct Skeleton ~Skeleton (); void warn_undefined_control_flow (uint32_t line, const std::string & cond); void emit_data (uint32_t line, const std::string & cond, const char * fname); + static void emit_prolog (OutputFile & o, uint32_t ind, uint32_t maxfill); + static void emit_epilog (OutputFile & o, uint32_t ind); private: void generate_paths (uint32_t line, const std::string & cond, FILE * input, std::ofstream & keys); @@ -64,9 +66,6 @@ private: FORBID_COPY (Skeleton); }; -void emit_prolog (OutputFile & o, uint32_t ind, uint32_t maxfill); -void emit_epilog (OutputFile & o, uint32_t ind); - } // namespace re2c #endif // _RE2C_CODEGEN_SKELETON_SKELETON_ diff --git a/re2c/src/ir/bytecode/bytecode.cc b/re2c/src/ir/bytecode/bytecode.cc index c769f502..afb592dc 100644 --- a/re2c/src/ir/bytecode/bytecode.cc +++ b/re2c/src/ir/bytecode/bytecode.cc @@ -8,7 +8,7 @@ namespace re2c { static void optimize (Ins * i); -smart_ptr genCode (RegExp *re, Output & output, const std::string & cond) +smart_ptr genCode (RegExp *re, Output & output) { CharSet cs; re->split(cs); @@ -56,17 +56,7 @@ smart_ptr genCode (RegExp *re, Output & output, const std::string & cond) smart_ptr dfa = make_smart_ptr(new DFA(ins, size, 0, encoding.nCodeUnits(), rep)); - OutputFile & o = output.source; - - Skeleton skeleton (*dfa); - skeleton.warn_undefined_control_flow (o.get_block_line (), cond); - - if (flag_skeleton) - { - skeleton.emit_data (o.get_block_line (), cond, o.file_name); - } - - dfa->prepare (o, output.max_fill); + dfa->prepare (output.source, output.max_fill); return dfa; } diff --git a/re2c/src/ir/bytecode/bytecode.h b/re2c/src/ir/bytecode/bytecode.h index 93331949..d4a9f4ba 100644 --- a/re2c/src/ir/bytecode/bytecode.h +++ b/re2c/src/ir/bytecode/bytecode.h @@ -9,7 +9,7 @@ namespace re2c { -smart_ptr genCode (RegExp * re, Output & output, const std::string & cond); +smart_ptr genCode (RegExp * re, Output & output); } // namespace re2c diff --git a/re2c/src/ir/dfa/dfa.cc b/re2c/src/ir/dfa/dfa.cc index 91b53333..17df5a45 100644 --- a/re2c/src/ir/dfa/dfa.cc +++ b/re2c/src/ir/dfa/dfa.cc @@ -1,5 +1,6 @@ #include +#include "src/codegen/skeleton/skeleton.h" #include "src/ir/dfa/dfa.h" #include "src/ir/regexp/regexp_rule.h" #include "src/util/allocate.h" @@ -37,7 +38,9 @@ struct GoTo }; DFA::DFA(Ins *ins, uint32_t ni, uint32_t lb, uint32_t ub, const Char *rep) - : lbChar(lb) + : accepts () + , skeleton (NULL) + , lbChar(lb) , ubChar(ub) , nStates(0) , head(NULL) @@ -45,8 +48,6 @@ DFA::DFA(Ins *ins, uint32_t ni, uint32_t lb, uint32_t ub, const Char *rep) , toDo(NULL) , free_ins(ins) , free_rep(rep) - , accepts () - { Ins **work = new Ins * [ni + 1]; uint32_t nc = ub - lb; @@ -126,6 +127,10 @@ DFA::DFA(Ins *ins, uint32_t ni, uint32_t lb, uint32_t ub, const Char *rep) delete [] work; delete [] goTo; operator delete (span); + + // skeleton must be constructed after DFA construction + // but prior to any other DFA transformations + skeleton = new Skeleton (*this); } DFA::~DFA() @@ -139,6 +144,8 @@ DFA::~DFA() } delete [] free_ins; delete [] free_rep; + + delete skeleton; } void DFA::addState(State **a, State *s) diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 90acd5f7..49b228c6 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -8,8 +8,13 @@ namespace re2c { +struct Skeleton; + class DFA { + accept_t accepts; + Skeleton * skeleton; + public: uint32_t lbChar; uint32_t ubChar; @@ -20,9 +25,6 @@ public: const Ins * free_ins; const Char * free_rep; -protected: - accept_t accepts; - public: DFA (Ins *, uint32_t, uint32_t, uint32_t, const Char *); ~DFA (); diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp index 43ad837f..22bbd52b 100644 --- a/re2c/src/parse/parser.ypp +++ b/re2c/src/parse/parser.ypp @@ -913,7 +913,7 @@ void parse(Scanner& i, Output & o) it->second = it->second ? mkAlt (def_rule, it->second) : def_rule; } - dfa_map[it->first] = genCode(it->second, o, it->first); + dfa_map[it->first] = genCode(it->second, o); } if (parseMode != Scanner::Rules && dfa_map.find(it->first) != dfa_map.end()) { @@ -940,7 +940,7 @@ void parse(Scanner& i, Output & o) { if (parseMode != Scanner::Reuse) { - dfa_map[""] = genCode(spec, o, ""); + dfa_map[""] = genCode(spec, o); } if (parseMode != Scanner::Rules && dfa_map.find("") != dfa_map.end()) { diff --git a/re2c/test/repeat-01.cgir.c b/re2c/test/repeat-01.cgir.c index 88c9b718..74fe8b42 100644 --- a/re2c/test/repeat-01.cgir.c +++ b/re2c/test/repeat-01.cgir.c @@ -1,5 +1,9 @@ -re2c: warning: line 13: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] -re2c: warning: line 13: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 22: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 22: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 34: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 34: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 46: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 46: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] /* Generated by re2c */ // multiple scanners diff --git a/re2c/test/repeat-02.cgir.c b/re2c/test/repeat-02.cgir.c index fd22eaa5..23b7cda8 100644 --- a/re2c/test/repeat-02.cgir.c +++ b/re2c/test/repeat-02.cgir.c @@ -1,5 +1,9 @@ -re2c: warning: line 13: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] -re2c: warning: line 13: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 20: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 20: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 32: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 32: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 44: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 44: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] /* Generated by re2c */ // multiple scanners diff --git a/re2c/test/repeat-03.cgir.c b/re2c/test/repeat-03.cgir.c index d6d021db..5277df8b 100644 --- a/re2c/test/repeat-03.cgir.c +++ b/re2c/test/repeat-03.cgir.c @@ -1,5 +1,3 @@ -re2c: warning: line 14: control flow in condition 'r1' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] -re2c: warning: line 14: control flow in condition 'r2' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 18: control flow in condition 'r1' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 18: control flow in condition 'r2' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 28: control flow in condition 'r1' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] diff --git a/re2c/test/repeat-04.cgir.c b/re2c/test/repeat-04.cgir.c index c4e18734..532e6b7a 100644 --- a/re2c/test/repeat-04.cgir.c +++ b/re2c/test/repeat-04.cgir.c @@ -1,3 +1,3 @@ -re2c: warning: line 14: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] -re2c: warning: line 14: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 18: control flow in condition 'r1' is undefined for strings that match '[\x0-\x30\x33-\x60\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] +re2c: warning: line 18: control flow in condition 'r2' is undefined for strings that match '[\x0-\x30\x33-\x61\x63-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: error: line 20, column 1: cannot have a second 'rules:re2c' block diff --git a/re2c/test/repeat-06.gir.c b/re2c/test/repeat-06.gir.c index 3d4b6b18..87b8fddb 100644 --- a/re2c/test/repeat-06.gir.c +++ b/re2c/test/repeat-06.gir.c @@ -1,4 +1,3 @@ -re2c: warning: line 14: control flow is undefined for strings that match '[\x0-\x60\x65-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 24: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 39: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 54: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] diff --git a/re2c/test/repeat-07.gir.c b/re2c/test/repeat-07.gir.c index 384698c0..4d8ba7d1 100644 --- a/re2c/test/repeat-07.gir.c +++ b/re2c/test/repeat-07.gir.c @@ -1,4 +1,3 @@ -re2c: warning: line 14: control flow is undefined for strings that match '[\x0-\x60\x65-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 27: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 45: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 63: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] diff --git a/re2c/test/repeat-07_error.gir.c b/re2c/test/repeat-07_error.gir.c index 63daf4a7..b91feac1 100644 --- a/re2c/test/repeat-07_error.gir.c +++ b/re2c/test/repeat-07_error.gir.c @@ -1,4 +1,3 @@ -re2c: warning: line 14: control flow is undefined for strings that match '[\x0-\x60\x65-\xFF]', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 27: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: warning: line 45: control flow is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: error: line 52, column 17: Cannot set UTF32 encoding: please reset USC2 encoding first diff --git a/re2c/test/reuse_conds_default_0.cgir.c b/re2c/test/reuse_conds_default_0.cgir.c index acf7550e..932b60ed 100644 --- a/re2c/test/reuse_conds_default_0.cgir.c +++ b/re2c/test/reuse_conds_default_0.cgir.c @@ -1,3 +1 @@ -re2c: warning: line 10: control flow in condition 'r1' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] -re2c: warning: line 10: control flow in condition 'r2' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: error: line 13, column 9: code to default rule 'r1' is already defined diff --git a/re2c/test/reuse_conds_default_1.cgir.c b/re2c/test/reuse_conds_default_1.cgir.c index 82f77304..87194fd8 100644 --- a/re2c/test/reuse_conds_default_1.cgir.c +++ b/re2c/test/reuse_conds_default_1.cgir.c @@ -1,2 +1 @@ -re2c: warning: line 17: control flow in condition 'r2' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: error: line 11, column 9: code to default rule 'r1' is already defined diff --git a/re2c/test/reuse_conds_setup_0.cgir.c b/re2c/test/reuse_conds_setup_0.cgir.c index 47807d22..8d084737 100644 --- a/re2c/test/reuse_conds_setup_0.cgir.c +++ b/re2c/test/reuse_conds_setup_0.cgir.c @@ -1,3 +1 @@ -re2c: warning: line 10: control flow in condition 'r1' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] -re2c: warning: line 10: control flow in condition 'r2' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: error: line 13, column 9: code to setup rule 'r1' is already defined diff --git a/re2c/test/reuse_conds_setup_1.cgir.c b/re2c/test/reuse_conds_setup_1.cgir.c index f39bb25e..e927119a 100644 --- a/re2c/test/reuse_conds_setup_1.cgir.c +++ b/re2c/test/reuse_conds_setup_1.cgir.c @@ -1,3 +1 @@ -re2c: warning: line 17: control flow in condition 'r1' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] -re2c: warning: line 17: control flow in condition 'r2' is undefined for strings that match '\xA', use default rule '*' [-Wundefined-control-flow] re2c: error: line 11, column 9: code to setup rule 'r1' is already defined -- 2.40.0