]> granicus.if.org Git - re2c/commitdiff
Delay code generation for condition dispatch.
authorUlya Trofimovich <skvadrik@gmail.com>
Tue, 27 Dec 2016 21:04:09 +0000 (21:04 +0000)
committerUlya Trofimovich <skvadrik@gmail.com>
Tue, 27 Dec 2016 21:04:09 +0000 (21:04 +0000)
re2c/bootstrap/src/parse/lex.cc
re2c/bootstrap/src/parse/parser.cc
re2c/src/codegen/emit_dfa.cc
re2c/src/codegen/output.cc
re2c/src/codegen/output.h
re2c/src/ir/adfa/adfa.h
re2c/src/parse/parser.ypp

index 2ba20d90c06b0646235420e69ffe27437c8a0960..79a043a844ebd2354de70555350b0ec776ed46f8 100644 (file)
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.16 on Tue Dec 27 16:57:27 2016 */
+/* Generated by re2c 0.16 on Tue Dec 27 20:54:24 2016 */
 #line 1 "../src/parse/lex.re"
 #include "src/util/c99_stdint.h"
 #include <stddef.h>
index db5eee846a378df398a9376c3bc79394674c3835..45e1cb941dbfe9140f7c0fd473a7eb7a1d4ca494 100644 (file)
@@ -2066,7 +2066,6 @@ void parse(Scanner &input, Output & o)
        Enc encodingOld = opts->encoding;
        for (Scanner::ParseMode mode; (mode = input.echo()) != Scanner::Stop;) {
                o.source.new_block ();
-               bool bPrologBrace = false;
 
                input.save_state(curr_state);
                if (opts->rFlag && mode == Scanner::Rules && !dfas.empty())
@@ -2122,19 +2121,12 @@ void parse(Scanner &input, Output & o)
                        }
                }
 
-               for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) {
-                       const std::string &c = (*i)->cond;
-                       if (c != "") {
-                               o.source.block().types.push_back(c);
-                       }
-               }
-
                // generate code
                if (mode != Scanner::Rules) {
+                       bool prolog = false;
                        uint32_t ind = opts->topIndent;
-                       size_t nCount = dfas.size();
                        for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) {
-                               (*i)->emit(o, ind, !--nCount, bPrologBrace);
+                               (*i)->emit(o, ind, (i + 1) == dfas.end(), prolog);
                        }
                }
 
index 5f308c372c5136036d9e80bd8f70597eb9ebaaac..11adcff9c55bd69937611e758ab24f9794fa9ce9 100644 (file)
 namespace re2c
 {
 
-static std::string genGetCondition (Opt &opts);
-static void genCondGotoSub (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax);
-static void genCondTable   (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
-static void genCondGoto    (OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames);
-static void emit_state     (OutputFile & o, uint32_t ind, const State * s, bool used_label);
-
-std::string genGetCondition(Opt &opts)
-{
-       return opts->cond_get_naked
-               ? opts->cond_get
-               : opts->cond_get + "()";
-}
+static void emit_state(OutputFile & o, uint32_t ind, const State * s, bool used_label);
 
 void emit_state (OutputFile & o, uint32_t ind, const State * s, bool used_label)
 {
@@ -91,25 +80,14 @@ void DFA::emit_body(OutputFile &o, uint32_t& ind,
        }
 }
 
-void DFA::emit_dot(
-       OutputFile &o,
-       bool last_cond,
-       const std::vector<std::string> &conds) const
+void DFA::emit_dot(OutputFile &o, bool last_cond) const
 {
        Opt &opts = o.opts;
        if (!opts->cFlag || !o.cond_goto) {
                o.ws("digraph re2c {\n");
        }
+       o.wdelay_cond_goto(0);
        if (opts->cFlag) {
-               if (!o.cond_goto) {
-                       for (size_t i = 0; i < conds.size(); ++i) {
-                               const std::string &cond = conds[i];
-                               o.ws("0 -> ").wstring(cond)
-                                       .ws(" [label=\"state=")
-                                       .wstring(cond).ws("\"]\n");
-                       }
-                       o.cond_goto = true;
-               }
                o.wstring(cond).ws(" -> ").wlabel(head->label).ws("\n");
        }
        for (State *s = head; s; s = s->next) {
@@ -142,6 +120,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra
        OutputFile &o = output.source;
        OutputBlock &ob = o.block();
        Opt &opts = o.opts;
+
        std::set<std::string> tagnames, tagvars;
        if (!oldstyle_ctxmarker) {
                for (size_t i = 0; i < vartags.size(); ++i) {
@@ -161,6 +140,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra
                }
                ob.tags.insert(tagnames.begin(), tagnames.end());
        }
+       if (!cond.empty()) o.block().types.push_back(cond);
 
        bool bProlog = (!opts->cFlag || !o.cond_goto);
 
@@ -192,7 +172,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra
                        emit_end(o, name, need_backup, oldstyle_ctxmarker);
                }
        } else if (opts->target == opt_t::DOT) {
-               emit_dot(o, isLastCond, ob.types);
+               emit_dot(o, isLastCond);
        } else {
                // Generate prolog
                if (bProlog)
@@ -231,10 +211,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra
                }
                if (bProlog)
                {
-                       if (opts->cFlag && !o.cond_goto && opts->gFlag)
-                       {
-                               genCondTable(o, ind, ob.types);
-                       }
+                       o.wdelay_cond_table(ind);
                        o.wdelay_state_goto (ind);
                        if (opts->cFlag)
                        {
@@ -244,10 +221,7 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra
                                }
                        }
                        o.wuser_start_label ();
-                       if (opts->cFlag && !o.cond_goto)
-                       {
-                               genCondGoto(o, ind, ob.types);
-                       }
+                       o.wdelay_cond_goto(ind);
                }
                if (opts->cFlag && !cond.empty())
                {
@@ -278,91 +252,6 @@ void DFA::emit(Output & output, uint32_t& ind, bool isLastCond, bool& bPrologBra
        }
 }
 
-void genCondTable(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
-{
-       Opt &opts = o.opts;
-       const size_t conds = condnames.size ();
-       o.wind(ind++).ws("static void *").wstring(opts->yyctable).ws("[").wu64(conds).ws("] = {\n");
-       for (size_t i = 0; i < conds; ++i)
-       {
-               o.wind(ind).ws("&&").wstring(opts->condPrefix).wstring(condnames[i]).ws(",\n");
-       }
-       o.wind(--ind).ws("};\n");
-}
-
-void genCondGotoSub(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames, uint32_t cMin, uint32_t cMax)
-{
-       Opt &opts = o.opts;
-       if (cMin == cMax)
-       {
-               o.wind(ind).ws("goto ").wstring(opts->condPrefix).wstring(condnames[cMin]).ws(";\n");
-       }
-       else
-       {
-               uint32_t cMid = cMin + ((cMax - cMin + 1) / 2);
-
-               o.wind(ind).ws("if (").wstring(genGetCondition(opts)).ws(" < ").wu32(cMid).ws(") {\n");
-               genCondGotoSub(o, ind + 1, condnames, cMin, cMid - 1);
-               o.wind(ind).ws("} else {\n");
-               genCondGotoSub(o, ind + 1, condnames, cMid, cMax);
-               o.wind(ind).ws("}\n");
-       }
-}
-
-/*
- * note [condition order]
- *
- * In theory re2c makes no guarantee about the order of conditions in
- * the generated lexer. Users should define condition type 'YYCONDTYPE'
- * and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'.
- * This way code is independent of internal re2c condition numbering.
- *
- * However, it is possible to manually hardcode condition numbers and make
- * re2c generate condition dispatch without explicit use of condition names
- * (nested 'if' statements with '-b' or computed 'goto' table with '-g').
- * This code is syntactically valid (compiles), but unsafe:
- *     - change of re2c options may break compilation
- *     - change of internal re2c condition numbering may break runtime
- *
- * re2c has to preserve the existing numbering scheme.
- *
- * re2c warns about implicit assumptions about condition order, unless:
- *     - condition type is defined with 'types:re2c' or '-t, --type-header'
- *     - dispatch is independent of condition order: either it uses
- *       explicit condition names or there's only one condition and
- *       dispatch shrinks to unconditional jump
- */
-void genCondGoto(OutputFile & o, uint32_t ind, const std::vector<std::string> & condnames)
-{
-       Opt &opts = o.opts;
-       const size_t conds = condnames.size ();
-       if (opts->gFlag)
-       {
-               o.wind(ind).ws("goto *").wstring(opts->yyctable).ws("[").wstring(genGetCondition(opts)).ws("];\n");
-       }
-       else if (opts->sFlag)
-       {
-               if (conds == 1)
-               {
-                       o.warn_condition_order = false; // see note [condition order]
-               }
-               genCondGotoSub(o, ind, condnames, 0, static_cast<uint32_t> (conds) - 1);
-       }
-       else
-       {
-               o.warn_condition_order = false; // see note [condition order]
-               o.wind(ind).ws("switch (").wstring(genGetCondition(opts)).ws(") {\n");
-               for (size_t i = 0; i < conds; ++i)
-               {
-                       const std::string & cond = condnames[i];
-                       o.wind(ind).ws("case ").wstring(opts->condEnumPrefix).wstring(cond).ws(": goto ").wstring(opts->condPrefix).wstring(cond).ws(";\n");
-               }
-               o.wind(ind).ws("}\n");
-       }
-       o.wdelay_warn_condition_order ();
-       o.cond_goto = true;
-}
-
 std::string vartag_name(tagver_t ver, const std::string &prefix)
 {
        std::ostringstream s;
index e3c34a541e1f62bc233aa15a81e6bf059450ab1b..eebbebfa8b05cfb74b1d5ef907b32c74470fe99b 100644 (file)
@@ -207,6 +207,25 @@ OutputFile & OutputFile::wdelay_line_info ()
        return *this;
 }
 
+OutputFile & OutputFile::wdelay_cond_goto(uint32_t ind)
+{
+       if (opts->cFlag && !cond_goto) {
+               block().fragments.push_back(new OutputFragment(OutputFragment::COND_GOTO, ind));
+               insert_code ();
+               cond_goto = true;
+       }
+       return *this;
+}
+
+OutputFile & OutputFile::wdelay_cond_table(uint32_t ind)
+{
+       if (opts->gFlag && opts->cFlag && !cond_goto) {
+               block().fragments.push_back(new OutputFragment(OutputFragment::COND_TABLE, ind));
+               insert_code ();
+       }
+       return *this;
+}
+
 OutputFile & OutputFile::wdelay_state_goto (uint32_t ind)
 {
        if (opts->fFlag && !state_goto) {
@@ -225,13 +244,6 @@ OutputFile & OutputFile::wdelay_types ()
        return *this;
 }
 
-OutputFile & OutputFile::wdelay_warn_condition_order ()
-{
-       block().fragments.push_back (new OutputFragment (OutputFragment::WARN_CONDITION_ORDER, 0));
-       insert_code ();
-       return *this;
-}
-
 OutputFile & OutputFile::wdelay_yyaccept_init (uint32_t ind)
 {
        block().fragments.push_back (new OutputFragment (OutputFragment::YYACCEPT_INIT, ind));
@@ -295,6 +307,13 @@ bool OutputFile::emit(const uniq_vector_t<std::string> &global_types,
                                case OutputFragment::LINE_INFO:
                                        output_line_info(f.stream, line_count + 1, filename, opts);
                                        break;
+                               case OutputFragment::COND_GOTO:
+                                       output_cond_goto(f.stream, f.indent, b.types,
+                                               opts, warn, warn_condition_order, b.line);
+                                       break;
+                               case OutputFragment::COND_TABLE:
+                                       output_cond_table(f.stream, f.indent, b.types, opts);
+                                       break;
                                case OutputFragment::STATE_GOTO:
                                        output_state_goto(f.stream, f.indent, 0, fill_index, opts);
                                        break;
@@ -304,11 +323,6 @@ bool OutputFile::emit(const uniq_vector_t<std::string> &global_types,
                                case OutputFragment::TYPES:
                                        output_types(f.stream, f.indent, global_types, opts);
                                        break;
-                               case OutputFragment::WARN_CONDITION_ORDER:
-                                       if (warn_condition_order) {// see note [condition order]
-                                               warn.condition_order (b.line);
-                                       }
-                                       break;
                                case OutputFragment::YYACCEPT_INIT:
                                        output_yyaccept_init(f.stream, f.indent, b.used_yyaccept, opts);
                                        break;
@@ -477,4 +491,100 @@ std::string output_get_state (Opt &opts)
                : opts->state_get + "()";
 }
 
+/*
+ * note [condition order]
+ *
+ * In theory re2c makes no guarantee about the order of conditions in
+ * the generated lexer. Users should define condition type 'YYCONDTYPE'
+ * and use values of this type with 'YYGETCONDITION' and 'YYSETCONDITION'.
+ * This way code is independent of internal re2c condition numbering.
+ *
+ * However, it is possible to manually hardcode condition numbers and make
+ * re2c generate condition dispatch without explicit use of condition names
+ * (nested 'if' statements with '-b' or computed 'goto' table with '-g').
+ * This code is syntactically valid (compiles), but unsafe:
+ *     - change of re2c options may break compilation
+ *     - change of internal re2c condition numbering may break runtime
+ *
+ * re2c has to preserve the existing numbering scheme.
+ *
+ * re2c warns about implicit assumptions about condition order, unless:
+ *     - condition type is defined with 'types:re2c' or '-t, --type-header'
+ *     - dispatch is independent of condition order: either it uses
+ *       explicit condition names or there's only one condition and
+ *       dispatch shrinks to unconditional jump
+ */
+
+static std::string output_cond_get(Opt &opts)
+{
+       return opts->cond_get + (opts->cond_get_naked ? "" : "()");
+}
+
+static void output_cond_goto_binary(std::ostream &o, uint32_t ind,
+       const std::vector<std::string> &conds, Opt &opts,
+       size_t lower, size_t upper)
+{
+       const std::string indstr = indent(ind, opts->indString);
+
+       if (lower == upper) {
+               o << indstr << "goto " << opts->condPrefix << conds[lower] << ";\n";
+       } else {
+               const size_t middle = lower + (upper - lower + 1) / 2;
+               o << indstr << "if (" << output_cond_get(opts) << " < " << middle << ") {\n";
+               output_cond_goto_binary(o, ind + 1, conds, opts, lower, middle - 1);
+               o << indstr << "} else {\n";
+               output_cond_goto_binary(o, ind + 1, conds, opts, middle, upper);
+               o << indstr << "}\n";
+       }
+}
+
+void output_cond_goto(std::ostream &o, uint32_t ind,
+       const std::vector<std::string> &conds, Opt &opts,
+       Warn &warn, bool warn_cond_order, uint32_t line)
+{
+       const size_t ncond = conds.size();
+       const std::string indstr = indent(ind, opts->indString);
+
+       if (opts->target == opt_t::DOT) {
+               for (size_t i = 0; i < ncond; ++i) {
+                       const std::string &cond = conds[i];
+                       o << "0 -> " << cond << " [label=\"state=" << cond << "\"]\n";
+               }
+               return;
+       }
+
+       if (opts->gFlag) {
+               o << indstr << "goto *" << opts->yyctable
+                       << "[" << output_cond_get(opts) << "];\n";
+       } else if (opts->sFlag) {
+               if (ncond == 1) warn_cond_order = false;
+               output_cond_goto_binary(o, ind, conds, opts, 0, ncond - 1);
+       } else {
+               warn_cond_order = false;
+               o << indstr << "switch (" << output_cond_get(opts) << ") {\n";
+               for (size_t i = 0; i < ncond; ++i) {
+                       const std::string &cond = conds[i];
+                       o << indstr << "case " << opts->condEnumPrefix << cond
+                               <<": goto " << opts->condPrefix << cond << ";\n";
+               }
+               o << indstr << "}\n";
+       }
+
+       // see note [condition order]
+       if (warn_cond_order) warn.condition_order(line);
+}
+
+void output_cond_table(std::ostream &o, uint32_t ind,
+       const std::vector<std::string> &conds, Opt &opts)
+{
+       const size_t ncond = conds.size();
+       const std::string indstr = opts->indString;
+
+       o << indent(ind++, indstr) << "static void *" << opts->yyctable << "[" << ncond << "] = {\n";
+       for (size_t i = 0; i < ncond; ++i) {
+               o << indent(ind, indstr) << "&&" << opts->condPrefix << conds[i] << ",\n";
+       }
+       o << indent(--ind, indstr) << "};\n";
+}
+
 } // namespace re2c
index 231c81e85665c79399008023cce093e536eb4cdf..364c12b0d8a5934cc058b0de41bc9dde7c3c162c 100644 (file)
@@ -34,11 +34,12 @@ struct OutputFragment
        enum type_t
                { CODE
 //             , CONFIG
+               , COND_GOTO
+               , COND_TABLE
                , LINE_INFO
                , STATE_GOTO
                , TAGS
                , TYPES
-               , WARN_CONDITION_ORDER
                , YYACCEPT_INIT
                , YYMAXFILL
                };
@@ -112,9 +113,10 @@ public:
        // delayed output
        OutputFile & wdelay_tags(uint32_t ind, const ConfTags *cf);
        OutputFile & wdelay_line_info ();
+       OutputFile & wdelay_cond_goto(uint32_t ind);
+       OutputFile & wdelay_cond_table(uint32_t ind);
        OutputFile & wdelay_state_goto (uint32_t ind);
        OutputFile & wdelay_types ();
-       OutputFile & wdelay_warn_condition_order ();
        OutputFile & wdelay_yyaccept_init (uint32_t ind);
        OutputFile & wdelay_yymaxfill ();
 
@@ -150,6 +152,8 @@ struct Output
 
 void output_tags          (std::ostream &o, const ConfTags &conf, const std::set<std::string> &tags);
 void output_line_info     (std::ostream &o, uint32_t ind, const std::string &file_name, Opt &opts);
+void output_cond_goto     (std::ostream &o, uint32_t ind, const std::vector<std::string> &conds, Opt &opts, Warn &warn, bool warn_cond_order, uint32_t line);
+void output_cond_table    (std::ostream &o, uint32_t ind, const std::vector<std::string> &conds, Opt &opts);
 void output_state_goto    (std::ostream &o, uint32_t ind, uint32_t start_label, uint32_t fill_index, Opt &opts);
 void output_types         (std::ostream &o, uint32_t ind, const uniq_vector_t<std::string> &types, Opt &opts);
 void output_version_time  (std::ostream &o, Opt &opts);
index 7a447604195af8d0934ba27b4b78908a5304ab00..7cb61bf91966dee964127485c22a923f7d4c08b3 100644 (file)
@@ -105,7 +105,7 @@ private:
        void hoist_tags();
        void count_used_labels (std::set<label_t> & used, label_t prolog, label_t start, bool force_start, bool fFlag) const;
        void emit_body (OutputFile &, uint32_t &, const std::set<label_t> & used_labels, label_t initial) const;
-       void emit_dot(OutputFile &o, bool last_cond, const std::vector<std::string> &conds) const;
+       void emit_dot(OutputFile &o, bool last_cond) const;
 
        FORBID_COPY (DFA);
 };
index f5e0da8c890d5158580269e56d9bf120e6204b1a..8e1b990c0f096828632c1bfdc0169e2c1c17d5a8 100644 (file)
@@ -441,7 +441,6 @@ void parse(Scanner &input, Output & o)
        Enc encodingOld = opts->encoding;
        for (Scanner::ParseMode mode; (mode = input.echo()) != Scanner::Stop;) {
                o.source.new_block ();
-               bool bPrologBrace = false;
 
                input.save_state(curr_state);
                if (opts->rFlag && mode == Scanner::Rules && !dfas.empty())
@@ -497,19 +496,12 @@ void parse(Scanner &input, Output & o)
                        }
                }
 
-               for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) {
-                       const std::string &c = (*i)->cond;
-                       if (c != "") {
-                               o.source.block().types.push_back(c);
-                       }
-               }
-
                // generate code
                if (mode != Scanner::Rules) {
+                       bool prolog = false;
                        uint32_t ind = opts->topIndent;
-                       size_t nCount = dfas.size();
                        for (dfas_t::const_iterator i = dfas.begin(); i != dfas.end(); ++i) {
-                               (*i)->emit(o, ind, !--nCount, bPrologBrace);
+                               (*i)->emit(o, ind, (i + 1) == dfas.end(), prolog);
                        }
                }