From: Ulya Trofimovich Date: Tue, 27 Dec 2016 21:04:09 +0000 (+0000) Subject: Delay code generation for condition dispatch. X-Git-Tag: 1.0~39^2~156 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b79ab68097d3c7a6421c0c7003d1cd8a026d9100;p=re2c Delay code generation for condition dispatch. --- diff --git a/re2c/bootstrap/src/parse/lex.cc b/re2c/bootstrap/src/parse/lex.cc index 2ba20d90..79a043a8 100644 --- a/re2c/bootstrap/src/parse/lex.cc +++ b/re2c/bootstrap/src/parse/lex.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 0.16 on Tue Dec 27 16:57:27 2016 */ +/* Generated by re2c 0.16 on Tue Dec 27 20:54:24 2016 */ #line 1 "../src/parse/lex.re" #include "src/util/c99_stdint.h" #include diff --git a/re2c/bootstrap/src/parse/parser.cc b/re2c/bootstrap/src/parse/parser.cc index db5eee84..45e1cb94 100644 --- a/re2c/bootstrap/src/parse/parser.cc +++ b/re2c/bootstrap/src/parse/parser.cc @@ -2066,7 +2066,6 @@ void parse(Scanner &input, Output & o) Enc encodingOld = opts->encoding; for (Scanner::ParseMode mode; (mode = input.echo()) != Scanner::Stop;) { o.source.new_block (); - bool bPrologBrace = false; input.save_state(curr_state); if (opts->rFlag && mode == Scanner::Rules && !dfas.empty()) @@ -2122,19 +2121,12 @@ void parse(Scanner &input, Output & o) } } - for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) { - const std::string &c = (*i)->cond; - if (c != "") { - o.source.block().types.push_back(c); - } - } - // generate code if (mode != Scanner::Rules) { + bool prolog = false; uint32_t ind = opts->topIndent; - size_t nCount = dfas.size(); for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) { - (*i)->emit(o, ind, !--nCount, bPrologBrace); + (*i)->emit(o, ind, (i + 1) == dfas.end(), prolog); } } diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 5f308c37..11adcff9 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -20,18 +20,7 @@ namespace re2c { -static std::string genGetCondition (Opt &opts); -static void genCondGotoSub (OutputFile & o, uint32_t ind, const std::vector & condnames, uint32_t cMin, uint32_t cMax); -static void genCondTable (OutputFile & o, uint32_t ind, const std::vector & condnames); -static void genCondGoto (OutputFile & o, uint32_t ind, const std::vector & condnames); -static void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label); - -std::string genGetCondition(Opt &opts) -{ - return opts->cond_get_naked - ? opts->cond_get - : opts->cond_get + "()"; -} +static void emit_state(OutputFile & o, uint32_t ind, const State * s, bool used_label); void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label) { @@ -91,25 +80,14 @@ void DFA::emit_body(OutputFile &o, uint32_t& ind, } } -void DFA::emit_dot( - OutputFile &o, - bool last_cond, - const std::vector &conds) const +void DFA::emit_dot(OutputFile &o, bool last_cond) const { Opt &opts = o.opts; if (!opts->cFlag || !o.cond_goto) { o.ws("digraph re2c {\n"); } + o.wdelay_cond_goto(0); if (opts->cFlag) { - if (!o.cond_goto) { - for (size_t i = 0; i < conds.size(); ++i) { - const std::string &cond = conds[i]; - o.ws("0 -> ").wstring(cond) - .ws(" [label=\"state=") - .wstring(cond).ws("\"]\n"); - } - o.cond_goto = true; - } o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n"); } for (State *s = head; s; s = s->next) { @@ -142,6 +120,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra OutputFile &o = output.source; OutputBlock &ob = o.block(); Opt &opts = o.opts; + std::set tagnames, tagvars; if (!oldstyle_ctxmarker) { for (size_t i = 0; i < vartags.size(); ++i) { @@ -161,6 +140,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra } ob.tags.insert(tagnames.begin(), tagnames.end()); } + if (!cond.empty()) o.block().types.push_back(cond); bool bProlog = (!opts->cFlag || !o.cond_goto); @@ -192,7 +172,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra emit_end(o, name, need_backup, oldstyle_ctxmarker); } } else if (opts->target == opt_t::DOT) { - emit_dot(o, isLastCond, ob.types); + emit_dot(o, isLastCond); } else { // Generate prolog if (bProlog) @@ -231,10 +211,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra } if (bProlog) { - if (opts->cFlag && !o.cond_goto && opts->gFlag) - { - genCondTable(o, ind, ob.types); - } + o.wdelay_cond_table(ind); o.wdelay_state_goto (ind); if (opts->cFlag) { @@ -244,10 +221,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra } } o.wuser_start_label (); - if (opts->cFlag && !o.cond_goto) - { - genCondGoto(o, ind, ob.types); - } + o.wdelay_cond_goto(ind); } if (opts->cFlag && !cond.empty()) { @@ -278,91 +252,6 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra } } -void genCondTable(OutputFile & o, uint32_t ind, const std::vector & condnames) -{ - Opt &opts = o.opts; - const size_t conds = condnames.size (); - o.wind(ind++).ws("static void *").wstring(opts->yyctable).ws("[").wu64(conds).ws("] = {\n"); - for (size_t i = 0; i < conds; ++i) - { - o.wind(ind).ws("&&").wstring(opts->condPrefix).wstring(condnames[i]).ws(",\n"); - } - o.wind(--ind).ws("};\n"); -} - -void genCondGotoSub(OutputFile & o, uint32_t ind, const std::vector & condnames, uint32_t cMin, uint32_t cMax) -{ - Opt &opts = o.opts; - if (cMin == cMax) - { - o.wind(ind).ws("goto ").wstring(opts->condPrefix).wstring(condnames[cMin]).ws(";\n"); - } - else - { - uint32_t cMid = cMin + ((cMax - cMin + 1) / 2); - - o.wind(ind).ws("if (").wstring(genGetCondition(opts)).ws(" < ").wu32(cMid).ws(") {\n"); - genCondGotoSub(o, ind + 1, condnames, cMin, cMid - 1); - o.wind(ind).ws("} else {\n"); - genCondGotoSub(o, ind + 1, condnames, cMid, cMax); - o.wind(ind).ws("}\n"); - } -} - -/* - * note [condition order] - * - * In theory re2c makes no guarantee about the order of conditions in - * the generated lexer. Users should define condition type 'YYCONDTYPE' - * and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'. - * This way code is independent of internal re2c condition numbering. - * - * However, it is possible to manually hardcode condition numbers and make - * re2c generate condition dispatch without explicit use of condition names - * (nested 'if' statements with '-b' or computed 'goto' table with '-g'). - * This code is syntactically valid (compiles), but unsafe: - * - change of re2c options may break compilation - * - change of internal re2c condition numbering may break runtime - * - * re2c has to preserve the existing numbering scheme. - * - * re2c warns about implicit assumptions about condition order, unless: - * - condition type is defined with 'types:re2c' or '-t, --type-header' - * - dispatch is independent of condition order: either it uses - * explicit condition names or there's only one condition and - * dispatch shrinks to unconditional jump - */ -void genCondGoto(OutputFile & o, uint32_t ind, const std::vector & condnames) -{ - Opt &opts = o.opts; - const size_t conds = condnames.size (); - if (opts->gFlag) - { - o.wind(ind).ws("goto *").wstring(opts->yyctable).ws("[").wstring(genGetCondition(opts)).ws("];\n"); - } - else if (opts->sFlag) - { - if (conds == 1) - { - o.warn_condition_order = false; // see note [condition order] - } - genCondGotoSub(o, ind, condnames, 0, static_cast (conds) - 1); - } - else - { - o.warn_condition_order = false; // see note [condition order] - o.wind(ind).ws("switch (").wstring(genGetCondition(opts)).ws(") {\n"); - for (size_t i = 0; i < conds; ++i) - { - const std::string & cond = condnames[i]; - o.wind(ind).ws("case ").wstring(opts->condEnumPrefix).wstring(cond).ws(": goto ").wstring(opts->condPrefix).wstring(cond).ws(";\n"); - } - o.wind(ind).ws("}\n"); - } - o.wdelay_warn_condition_order (); - o.cond_goto = true; -} - std::string vartag_name(tagver_t ver, const std::string &prefix) { std::ostringstream s; diff --git a/re2c/src/codegen/output.cc b/re2c/src/codegen/output.cc index e3c34a54..eebbebfa 100644 --- a/re2c/src/codegen/output.cc +++ b/re2c/src/codegen/output.cc @@ -207,6 +207,25 @@ OutputFile & OutputFile::wdelay_line_info () return *this; } +OutputFile & OutputFile::wdelay_cond_goto(uint32_t ind) +{ + if (opts->cFlag && !cond_goto) { + block().fragments.push_back(new OutputFragment(OutputFragment::COND_GOTO, ind)); + insert_code (); + cond_goto = true; + } + return *this; +} + +OutputFile & OutputFile::wdelay_cond_table(uint32_t ind) +{ + if (opts->gFlag && opts->cFlag && !cond_goto) { + block().fragments.push_back(new OutputFragment(OutputFragment::COND_TABLE, ind)); + insert_code (); + } + return *this; +} + OutputFile & OutputFile::wdelay_state_goto (uint32_t ind) { if (opts->fFlag && !state_goto) { @@ -225,13 +244,6 @@ OutputFile & OutputFile::wdelay_types () return *this; } -OutputFile & OutputFile::wdelay_warn_condition_order () -{ - block().fragments.push_back (new OutputFragment (OutputFragment::WARN_CONDITION_ORDER, 0)); - insert_code (); - return *this; -} - OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind) { block().fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind)); @@ -295,6 +307,13 @@ bool OutputFile::emit(const uniq_vector_t &global_types, case OutputFragment::LINE_INFO: output_line_info(f.stream, line_count + 1, filename, opts); break; + case OutputFragment::COND_GOTO: + output_cond_goto(f.stream, f.indent, b.types, + opts, warn, warn_condition_order, b.line); + break; + case OutputFragment::COND_TABLE: + output_cond_table(f.stream, f.indent, b.types, opts); + break; case OutputFragment::STATE_GOTO: output_state_goto(f.stream, f.indent, 0, fill_index, opts); break; @@ -304,11 +323,6 @@ bool OutputFile::emit(const uniq_vector_t &global_types, case OutputFragment::TYPES: output_types(f.stream, f.indent, global_types, opts); break; - case OutputFragment::WARN_CONDITION_ORDER: - if (warn_condition_order) {// see note [condition order] - warn.condition_order (b.line); - } - break; case OutputFragment::YYACCEPT_INIT: output_yyaccept_init(f.stream, f.indent, b.used_yyaccept, opts); break; @@ -477,4 +491,100 @@ std::string output_get_state (Opt &opts) : opts->state_get + "()"; } +/* + * note [condition order] + * + * In theory re2c makes no guarantee about the order of conditions in + * the generated lexer. Users should define condition type 'YYCONDTYPE' + * and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'. + * This way code is independent of internal re2c condition numbering. + * + * However, it is possible to manually hardcode condition numbers and make + * re2c generate condition dispatch without explicit use of condition names + * (nested 'if' statements with '-b' or computed 'goto' table with '-g'). + * This code is syntactically valid (compiles), but unsafe: + * - change of re2c options may break compilation + * - change of internal re2c condition numbering may break runtime + * + * re2c has to preserve the existing numbering scheme. + * + * re2c warns about implicit assumptions about condition order, unless: + * - condition type is defined with 'types:re2c' or '-t, --type-header' + * - dispatch is independent of condition order: either it uses + * explicit condition names or there's only one condition and + * dispatch shrinks to unconditional jump + */ + +static std::string output_cond_get(Opt &opts) +{ + return opts->cond_get + (opts->cond_get_naked ? "" : "()"); +} + +static void output_cond_goto_binary(std::ostream &o, uint32_t ind, + const std::vector &conds, Opt &opts, + size_t lower, size_t upper) +{ + const std::string indstr = indent(ind, opts->indString); + + if (lower == upper) { + o << indstr << "goto " << opts->condPrefix << conds[lower] << ";\n"; + } else { + const size_t middle = lower + (upper - lower + 1) / 2; + o << indstr << "if (" << output_cond_get(opts) << " < " << middle << ") {\n"; + output_cond_goto_binary(o, ind + 1, conds, opts, lower, middle - 1); + o << indstr << "} else {\n"; + output_cond_goto_binary(o, ind + 1, conds, opts, middle, upper); + o << indstr << "}\n"; + } +} + +void output_cond_goto(std::ostream &o, uint32_t ind, + const std::vector &conds, Opt &opts, + Warn &warn, bool warn_cond_order, uint32_t line) +{ + const size_t ncond = conds.size(); + const std::string indstr = indent(ind, opts->indString); + + if (opts->target == opt_t::DOT) { + for (size_t i = 0; i < ncond; ++i) { + const std::string &cond = conds[i]; + o << "0 -> " << cond << " [label=\"state=" << cond << "\"]\n"; + } + return; + } + + if (opts->gFlag) { + o << indstr << "goto *" << opts->yyctable + << "[" << output_cond_get(opts) << "];\n"; + } else if (opts->sFlag) { + if (ncond == 1) warn_cond_order = false; + output_cond_goto_binary(o, ind, conds, opts, 0, ncond - 1); + } else { + warn_cond_order = false; + o << indstr << "switch (" << output_cond_get(opts) << ") {\n"; + for (size_t i = 0; i < ncond; ++i) { + const std::string &cond = conds[i]; + o << indstr << "case " << opts->condEnumPrefix << cond + <<": goto " << opts->condPrefix << cond << ";\n"; + } + o << indstr << "}\n"; + } + + // see note [condition order] + if (warn_cond_order) warn.condition_order(line); +} + +void output_cond_table(std::ostream &o, uint32_t ind, + const std::vector &conds, Opt &opts) +{ + const size_t ncond = conds.size(); + const std::string indstr = opts->indString; + + o << indent(ind++, indstr) << "static void *" << opts->yyctable << "[" << ncond << "] = {\n"; + for (size_t i = 0; i < ncond; ++i) { + o << indent(ind, indstr) << "&&" << opts->condPrefix << conds[i] << ",\n"; + } + o << indent(--ind, indstr) << "};\n"; +} + } // namespace re2c diff --git a/re2c/src/codegen/output.h b/re2c/src/codegen/output.h index 231c81e8..364c12b0 100644 --- a/re2c/src/codegen/output.h +++ b/re2c/src/codegen/output.h @@ -34,11 +34,12 @@ struct OutputFragment enum type_t { CODE // , CONFIG + , COND_GOTO + , COND_TABLE , LINE_INFO , STATE_GOTO , TAGS , TYPES - , WARN_CONDITION_ORDER , YYACCEPT_INIT , YYMAXFILL }; @@ -112,9 +113,10 @@ public: // delayed output OutputFile & wdelay_tags(uint32_t ind, const ConfTags *cf); OutputFile & wdelay_line_info (); + OutputFile & wdelay_cond_goto(uint32_t ind); + OutputFile & wdelay_cond_table(uint32_t ind); OutputFile & wdelay_state_goto (uint32_t ind); OutputFile & wdelay_types (); - OutputFile & wdelay_warn_condition_order (); OutputFile & wdelay_yyaccept_init (uint32_t ind); OutputFile & wdelay_yymaxfill (); @@ -150,6 +152,8 @@ struct Output void output_tags (std::ostream &o, const ConfTags &conf, const std::set &tags); void output_line_info (std::ostream &o, uint32_t ind, const std::string &file_name, Opt &opts); +void output_cond_goto (std::ostream &o, uint32_t ind, const std::vector &conds, Opt &opts, Warn &warn, bool warn_cond_order, uint32_t line); +void output_cond_table (std::ostream &o, uint32_t ind, const std::vector &conds, Opt &opts); void output_state_goto (std::ostream &o, uint32_t ind, uint32_t start_label, uint32_t fill_index, Opt &opts); void output_types (std::ostream &o, uint32_t ind, const uniq_vector_t &types, Opt &opts); void output_version_time (std::ostream &o, Opt &opts); diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index 7a447604..7cb61bf9 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -105,7 +105,7 @@ private: void hoist_tags(); void count_used_labels (std::set & used, label_t prolog, label_t start, bool force_start, bool fFlag) const; void emit_body (OutputFile &, uint32_t &, const std::set & used_labels, label_t initial) const; - void emit_dot(OutputFile &o, bool last_cond, const std::vector &conds) const; + void emit_dot(OutputFile &o, bool last_cond) const; FORBID_COPY (DFA); }; diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp index f5e0da8c..8e1b990c 100644 --- a/re2c/src/parse/parser.ypp +++ b/re2c/src/parse/parser.ypp @@ -441,7 +441,6 @@ void parse(Scanner &input, Output & o) Enc encodingOld = opts->encoding; for (Scanner::ParseMode mode; (mode = input.echo()) != Scanner::Stop;) { o.source.new_block (); - bool bPrologBrace = false; input.save_state(curr_state); if (opts->rFlag && mode == Scanner::Rules && !dfas.empty()) @@ -497,19 +496,12 @@ void parse(Scanner &input, Output & o) } } - for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) { - const std::string &c = (*i)->cond; - if (c != "") { - o.source.block().types.push_back(c); - } - } - // generate code if (mode != Scanner::Rules) { + bool prolog = false; uint32_t ind = opts->topIndent; - size_t nCount = dfas.size(); for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) { - (*i)->emit(o, ind, !--nCount, bPrologBrace); + (*i)->emit(o, ind, (i + 1) == dfas.end(), prolog); } }