From fa88d997f2780d0a5212f36b175816223650db6c Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 17 Mar 2015 16:00:52 +0000 Subject: [PATCH] Split control flow codegen in two phases: - First, re2c builds a complex structure where it stores all control flow codegen decisions: nested ifs or switches, bitmaps or computed gotos, etc. - Second, this structure is traversed and code is generated. This differentiation is necessary to compute some statistics (e.g. used labels) in advance, before code generation. --- re2c/Makefile.am | 4 +- re2c/cases.cc | 52 ----- re2c/cases.h | 34 ---- re2c/code.cc | 344 +-------------------------------- re2c/go.cc | 493 +++++++++++++++++++++++++++++++++++++++++++++-- re2c/go.h | 138 ++++++++++++- 6 files changed, 615 insertions(+), 450 deletions(-) delete mode 100644 re2c/cases.cc delete mode 100644 re2c/cases.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index b7ade844..a72132d3 100755 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -2,10 +2,10 @@ bin_PROGRAMS = re2c win_BINARIES = $(WINBUILDDIR)/re2c.exe -re2c_SOURCES = cases.cc code.cc dfa.cc go.cc main.cc parser.cc actions.cc scanner.re substr.cc range.cc \ +re2c_SOURCES = code.cc dfa.cc go.cc main.cc parser.cc actions.cc scanner.re substr.cc range.cc \ translate.cc scanner.cc mbo_getopt.cc print.cc input.cc input_api.cc output.cc \ enc.cc utf8.cc utf8_range.cc utf8_regexp.cc utf16.cc utf16_range.cc utf16_regexp.cc range_suffix.cc \ - basics.h cases.h code.h code_names.h dfa.h go.h enc.h indent.h input.h input_api.h free_list.h globals.h ins.h \ + basics.h code.h code_names.h dfa.h go.h enc.h indent.h input.h input_api.h free_list.h globals.h ins.h \ mbo_getopt.h parser.h print.h range.h range_suffix.h re.h \ scanner.h smart_ptr.h substr.h token.h output.h \ utf16.h utf16_range.h utf16_regexp.h utf8.h utf8_range.h utf8_regexp.h diff --git a/re2c/cases.cc b/re2c/cases.cc deleted file mode 100644 index 294f1ace..00000000 --- a/re2c/cases.cc +++ /dev/null @@ -1,52 +0,0 @@ -#include "cases.h" - -namespace re2c { - -Cases::Cases (const Span * span, uint span_size) - : def (span_size == 0 ? NULL : span[span_size - 1].to) - , cases (new Case[span_size]) - , cases_size (0) -{ - for (uint i = 0, lb = 0; i < span_size; ++ i) - { - add (lb, span[i].ub, span[i].to); - lb = span[i].ub; - } -} - -void Cases::add (uint lb, uint ub, State * to) -{ - for (uint i = 0; i < cases_size; ++i) - { - if (cases[i].to == to) - { - cases[i].ranges.push_back (std::make_pair (lb, ub)); - return; - } - } - cases[cases_size].ranges.push_back (std::make_pair (lb, ub)); - cases[cases_size].to = to; - ++cases_size; -} - -uint Cases::size () const -{ - return cases_size; -} - -State * Cases::default_state () const -{ - return def; -} - -const Case & Cases::operator [] (uint i) const -{ - return cases[i]; -} - -Cases::~Cases () -{ - delete [] cases; -} - -} // namespace re2c diff --git a/re2c/cases.h b/re2c/cases.h deleted file mode 100644 index d0d6a0ca..00000000 --- a/re2c/cases.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef _cases_h -#define _cases_h - -#include - -#include "dfa.h" - -namespace re2c { - -struct Case -{ - std::vector > ranges; - State * to; -}; - -class Cases -{ - State * def; - Case * cases; - uint cases_size; - - void add (uint lb, uint ub, State * to); - -public: - Cases (const Span * s, uint n); - ~Cases (); - uint size () const; - State * default_state () const; - const Case & operator [] (uint i) const; -}; - -} //namespace re2c - -#endif // _cases_h diff --git a/re2c/code.cc b/re2c/code.cc index 986cead5..9d4e633d 100644 --- a/re2c/code.cc +++ b/re2c/code.cc @@ -10,7 +10,6 @@ #include #include -#include "cases.h" #include "code.h" #include "globals.h" #include "go.h" @@ -102,42 +101,6 @@ static void genSetCondition(OutputFile & o, uint ind, const std::string& newcond } } -static std::string space(uint this_label) -{ - int nl = next_label > 999999 ? 6 : next_label > 99999 ? 5 : next_label > 9999 ? 4 : next_label > 999 ? 3 : next_label > 99 ? 2 : next_label > 9 ? 1 : 0; - int tl = this_label > 999999 ? 6 : this_label > 99999 ? 5 : this_label > 9999 ? 4 : this_label > 999 ? 3 : this_label > 99 ? 2 : this_label > 9 ? 1 : 0; - - return std::string(std::max(1, nl - tl + 1), ' '); -} - -/* - * Find all spans, that map to the given state. For each of them, - * find upper adjacent span, that maps to another state (if such - * span exists, otherwize try lower one). - * If input contains single span that maps to the given state, - * then output contains 0 spans. - */ -static void unmap (Go & go, const Go & base, const State * x) -{ - go.nSpans = 0; - for (uint i = 0; i < base.nSpans; ++i) - { - if (base.span[i].to != x) - { - if (go.nSpans > 0 && go.span[go.nSpans - 1].to == base.span[i].to) - go.span[go.nSpans - 1].ub = base.span[i].ub; - else - { - go.span[go.nSpans].to = base.span[i].to; - go.span[go.nSpans].ub = base.span[i].ub; - ++go.nSpans; - } - } - } - if (go.nSpans > 0) - go.span[go.nSpans - 1].ub = base.span[base.nSpans - 1].ub; -} - static void doGen(const Go *g, const State *s, uint *bm, uint f, uint m) { Span *b = g->span, *e = &b[g->nSpans]; @@ -178,7 +141,7 @@ const BitMap *BitMap::find(const Go *g, const State *x) { for (const BitMap *b = first; b; b = b->next) { - if (matches(b->go, b->on, g, x)) + if (matches(b->go->span, b->go->nSpans, b->on, g->span, g->nSpans, x)) { return b; } @@ -276,24 +239,6 @@ static void genGoTo(OutputFile & o, uint ind, const State *from, const State *to vUsedLabels.insert(to->label); } -static void genIf(OutputFile & o, uint ind, const char *cmp, uint v, bool &readCh) -{ - o << indent(ind) << "if ("; - if (readCh) - { - o << "(" << input_api.expr_peek_save () << ")"; - readCh = false; - } - else - { - o << mapCodeName["yych"]; - } - - o << " " << cmp << " "; - o.write_char_hex (v); - o << ") "; -} - static void need(OutputFile & o, uint ind, uint n, bool & readCh, bool bSetMarker) { if (DFlag) @@ -665,279 +610,6 @@ void Rule::emit(Output & output, uint ind, bool &, const std::string& condName) o.insert_line_info (); } -static void doLinear(OutputFile & o, uint ind, Span * s, uint n, const State *from, const State *next, bool &readCh) -{ - for (;;) - { - State *bg = s[0].to; - - while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1) - { - if (s[1].to == next && n == 3) - { - genIf(o, ind, "!=", s[0].ub, readCh); - genGoTo(o, 0, from, bg, readCh); - return ; - } - else - { - genIf(o, ind, "==", s[0].ub, readCh); - genGoTo(o, 0, from, s[1].to, readCh); - } - - n -= 2; - s += 2; - } - - if (n == 1) - { - if (s[0].to->label != from->label + 1) - { - genGoTo(o, ind, from, s[0].to, readCh); - } - return ; - } - else if (n == 2 && bg == next) - { - genIf(o, ind, ">=", s[0].ub, readCh); - genGoTo(o, 0, from, s[1].to, readCh); - return ; - } - else - { - genIf(o, ind, "<=", s[0].ub - 1, readCh); - genGoTo(o, 0, from, bg, readCh); - n -= 1; - s += 1; - } - } -} - -static void genCases (OutputFile & o, uint ind, const std::vector > & ranges) -{ - for (uint i = 0; i < ranges.size (); ++i) - { - for (uint b = ranges[i].first; b < ranges[i].second; ++b) - { - o << indent(ind) << "case "; - o.write_char_hex (b); - o << ":"; - if (dFlag && encoding.is(Enc::EBCDIC)) - { - const uint c = encoding.decodeUnsafe(b); - if (isprint(c)) - o << " /* " << std::string(1, c) << " */"; - } - bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1; - if (!last_case) - { - o << "\n"; - } - } - } -} - -static void genSwitchD (OutputFile & o, const State *from, Span * sp, uint nsp) -{ - if (nsp == 1) - { - o << from->label << " -> " << sp[0].to->label << "\n"; - } - else if (nsp > 1) - { - Cases cases (sp, nsp); - for (uint i = 0; i < cases.size (); ++i) - { - o << from->label << " -> " << cases[i].to->label << " [label=\""; - for (uint j = 0; j < cases[i].ranges.size (); ++j) - { - o.write_range (cases[i].ranges[j].first, cases[i].ranges[j].second); - } - o << "\"]\n"; - } - } -} - -static std::string output_yych (bool & readCh) -{ - if (readCh) - { - readCh = false; - return "(" + input_api.expr_peek_save () + ")"; - } - else - { - return mapCodeName["yych"]; - } -} - -static void genSwitch(OutputFile & o, uint ind, const State *from, const State *next, bool &readCh, Span * sp, uint nsp) -{ - if (nsp <= 2) - { - doLinear(o, ind, sp, nsp, from, next, readCh); - } - else - { - o << indent(ind) << "switch (" << output_yych (readCh) << ") {\n"; - - Cases cases (sp, nsp); - for (uint i = 0; i < cases.size (); ++i) - { - if (cases[i].to != cases.default_state ()) - { - genCases (o, ind, cases[i].ranges); - genGoTo(o, 1, from, cases[i].to, readCh); - } - } - - o << indent(ind) << "default:"; - genGoTo(o, 1, from, cases.default_state (), readCh); - o << indent(ind) << "}\n"; - } -} - -static void doBinary(OutputFile & o, uint ind, Span * s, uint n, const State *from, const State *next, bool &readCh) -{ - if (n <= 4) - { - doLinear(o, ind, s, n, from, next, readCh); - } - else - { - uint h = n / 2; - - genIf(o, ind, "<=", s[h - 1].ub - 1, readCh); - o << "{\n"; - doBinary(o, ind+1, &s[0], h, from, next, readCh); - o << indent(ind) << "} else {\n"; - doBinary(o, ind+1, &s[h], n - h, from, next, readCh); - o << indent(ind) << "}\n"; - } -} - -static void genBase(OutputFile & o, uint ind, const State *from, const State *next, bool &readCh, Span * sp, uint nsp) -{ - if (nsp == 0) - { - return; - } - else if (!sFlag || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2)))) - { - genSwitch(o, ind, from, next, readCh, sp, nsp); - } - else if (nsp > 5) - { - doBinary(o, ind, sp, nsp, from, next, readCh); - } - else - { - doLinear(o, ind, sp, nsp, from, next, readCh); - } -} - -static std::string genGotoProlog (OutputFile & o, uint ind, const State *from, const State *next, bool &readCh, Span * sp, uint nsp) -{ - std::string sYych = output_yych (readCh); - - if (nsp > 0) - { - o << indent(ind) << "if (" << sYych <<" & ~0xFF) {\n"; - genBase(o, ind + 1, from, next, readCh, sp, nsp); - o << indent(ind) << "} else "; - sYych = mapCodeName["yych"]; - } - else - { - o << indent(ind); - } - - return sYych; -} - -static void genCpGoto (OutputFile & o, uint ind, const State *from, const State *next, bool &readCh, Span * sp, uint nsp, Span * hsp, uint nhsp) -{ - const std::string sYych = genGotoProlog(o, ind, from, next, readCh, hsp, nhsp); - o << "{\n"; - ++ind; - o << indent(ind++) << "static void *" << mapCodeName["yytarget"] << "[256] = {\n"; - o << indent(ind); - - uint ch = 0; - for (uint i = 0; i < nsp; ++i) - { - vUsedLabels.insert(sp[i].to->label); - for(; ch < sp[i].ub; ++ch) - { - o << "&&" << labelPrefix << sp[i].to->label; - if (ch == 255) - { - o << "\n"; - i = nsp; - break; - } - else if (ch % 8 == 7) - { - o << ",\n" << indent(ind); - } - else - { - o << "," << space(sp[i].to->label); - } - } - } - o << indent(--ind) << "};\n"; - o << indent(ind) << "goto *" << mapCodeName["yytarget"] << "[" << sYych << "];\n"; - o << indent(--ind) << "}\n"; -} - -static void genGoto (OutputFile & o, uint ind, const Go & go, const State *from, const State *next, bool &readCh) -{ - if (DFlag) - { - genSwitchD (o, from, go.span, go.nSpans); - return; - } - - const uint dSpans = go.nSpans - go.hSpans - go.nBitmaps; - if (gFlag && (dSpans >= cGotoThreshold)) - { - genCpGoto(o, ind, from, next, readCh, go.span, go.nSpans, go.hspan, go.hSpans); - return; - } - else if (bFlag) - { - for (uint i = 0; i < go.nSpans; ++i) - { - if (const BitMap * b = go.bitmaps[i]) - { - Go go1; - go1.span = new Span[go.nSpans]; - unmap (go1, go, go.span[i].to); - const std::string sYych = genGotoProlog(o, ind, from, next, readCh, go.hspan, go.hSpans); - bUsedYYBitmap = true; - o << "if (" << mapCodeName["yybm"] << "[" << b->i << "+" << sYych << "] & "; - if (yybmHexTable) - { - o.write_hex (b->m); - } - else - { - o << (uint) b->m; - } - o << ") {\n"; - genGoTo(o, ind+1, from, go.span[i].to, readCh); - o << indent(ind) << "}\n"; - genBase(o, ind, from, next, readCh, go1.span, go1.nSpans); - delete [] go1.span; - return ; - } - } - } - - genBase(o, ind, from, next, readCh, go.span, go.nSpans); -} - void State::emit(Output & output, uint ind, bool &readCh, const std::string& condName) const { OutputFile & o = output.source; @@ -1402,11 +1074,6 @@ void DFA::prepare(uint & max_fill) // find ``base'' state, if possible findBaseState(); - for (s = head; s; s = s->next) - { - s->go.init (); - } - delete head->action; head->action = NULL; } @@ -1448,6 +1115,11 @@ void DFA::emit(Output & output, uint& ind, const RegExpMap* specMap, const std:: s->label = next_label++; } + for (s = head; s; s = s->next) + { + s->go.init (s, s->next); + } + /* for (State * s = head; s; s = s->next) { @@ -1474,7 +1146,7 @@ std::cerr << "\t" << s->go.span[i].to->label << " " << s->next->label << std::en { bool readCh = false; s->emit(null_dev, ind, readCh, condName); - genGoto(null_dev.source, ind, s->go, s, s->next, readCh); + s->go.emit(null_dev.source, ind, readCh); } if (last_fill_index < next_fill_index) { @@ -1577,7 +1249,7 @@ std::cerr << "\t" << s->go.span[i].to->label << " " << s->next->label << std::en { bool readCh = false; s->emit(output, ind, readCh, condName); - genGoto(o, ind, s->go, s, s->next, readCh); + s->go.emit(o, ind, readCh); } if (cFlag && bFlag && BitMap::first) diff --git a/re2c/go.cc b/re2c/go.cc index 88d3c493..49bcb64a 100644 --- a/re2c/go.cc +++ b/re2c/go.cc @@ -1,10 +1,66 @@ #include "dfa.h" #include "go.h" +#include "indent.h" #include "print.h" namespace re2c { +static std::string space(uint this_label) +{ + int nl = next_label > 999999 ? 6 : next_label > 99999 ? 5 : next_label > 9999 ? 4 : next_label > 999 ? 3 : next_label > 99 ? 2 : next_label > 9 ? 1 : 0; + int tl = this_label > 999999 ? 6 : this_label > 99999 ? 5 : this_label > 9999 ? 4 : this_label > 999 ? 3 : this_label > 99 ? 2 : this_label > 9 ? 1 : 0; + return std::string(std::max(1, nl - tl + 1), ' '); +} + +static std::string output_yych (bool & readCh) +{ + if (readCh) + { + readCh = false; + return "(" + input_api.expr_peek_save () + ")"; + } + else + { + return mapCodeName["yych"]; + } +} + +static void output_if (OutputFile & o, uint ind, bool & readCh, const std::string & compare, uint value) +{ + o << indent(ind) << "if (" << output_yych (readCh) << " " << compare << " "; + o.write_char_hex (value); + o << ") "; +} + +static void output_goto (OutputFile & o, uint ind, bool & readCh, uint to) +{ + if (readCh) + { + o << input_api.stmt_peek (ind); + readCh = false; + } + o << indent (ind) << "goto " << labelPrefix << to << ";\n"; + vUsedLabels.insert(to); +} + +static std::string output_hgo (OutputFile & o, uint ind, bool & readCh, SwitchIf * hgo) +{ + std::string yych = output_yych (readCh); + if (hgo != NULL) + { + o << indent (ind) << "if (" << yych <<" & ~0xFF) {\n"; + hgo->emit (o, ind + 1, readCh); + o << indent (ind) << "} else "; + yych = mapCodeName["yych"]; + } + else + { + o << indent (ind); + } + return yych; +} + uint Span::show (std::ostream & o, uint lb) const { if (to) @@ -15,23 +71,347 @@ uint Span::show (std::ostream & o, uint lb) const return ub; } +void Case::emit (OutputFile & o, uint ind) +{ + for (uint i = 0; i < ranges.size (); ++i) + { + for (uint b = ranges[i].first; b < ranges[i].second; ++b) + { + o << indent (ind) << "case "; + o.write_char_hex (b); + o << ":"; + if (dFlag && encoding.is (Enc::EBCDIC)) + { + const uint c = encoding.decodeUnsafe (b); + if (isprint (c)) + o << " /* " << std::string (1, c) << " */"; + } + bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1; + if (!last_case) + { + o << "\n"; + } + } + } +} + +Cases::Cases (const Span * span, uint span_size) + : def (span_size == 0 ? NULL : span[span_size - 1].to) + , cases (new Case[span_size]) + , cases_size (0) +{ + for (uint i = 0, lb = 0; i < span_size; ++ i) + { + add (lb, span[i].ub, span[i].to); + lb = span[i].ub; + } +} + +Cases::~Cases () +{ + delete [] cases; +} + +void Cases::add (uint lb, uint ub, State * to) +{ + for (uint i = 0; i < cases_size; ++i) + { + if (cases[i].to == to) + { + cases[i].ranges.push_back (std::make_pair (lb, ub)); + return; + } + } + cases[cases_size].ranges.push_back (std::make_pair (lb, ub)); + cases[cases_size].to = to; + ++cases_size; +} + +void Cases::emit (OutputFile & o, uint ind, bool & readCh) +{ + o << indent(ind) << "switch (" << output_yych (readCh) << ") {\n"; + for (uint i = 0; i < cases_size; ++i) + { + if (cases[i].to != def) + { + cases[i].emit (o, ind); + output_goto (o, 1, readCh, cases[i].to->label); + } + } + o << indent (ind) << "default:"; + output_goto (o, 1, readCh, def->label); + o << indent (ind) << "}\n"; +} + +Cond::Cond (const std::string & cmp, uint val) + : compare (cmp) + , value (val) +{} + +Binary::Binary (const Span * s, uint n, const State * from, const State * next) + : cond (new Cond ("<=", s[n / 2 - 1].ub - 1)) + , thn (new If (n / 2 > 4 ? If::BINARY : If::LINEAR, &s[0], n / 2, from, next)) + , els (new If (n - n / 2 > 4 ? If::BINARY : If::LINEAR, &s[n / 2], n - n / 2, from, next)) +{} + +void Binary::emit (OutputFile & o, uint ind, bool & readCh) +{ + output_if (o, ind, readCh, cond->compare, cond->value); + o << "{\n"; + thn->emit (o, ind + 1, readCh); + o << indent (ind) << "} else {\n"; + els->emit (o, ind + 1, readCh); + o << indent (ind) << "}\n"; +} + +Linear::Linear (const Span * s, uint n, const State * from, const State * next) + : branches () +{ + for (;;) + { + const State *bg = s[0].to; + while (n >= 3 && s[2].to == bg && (s[1].ub - s[0].ub) == 1) + { + if (s[1].to == next && n == 3) + { + branches.push_back (std::make_pair (new Cond ("!=", s[0].ub), bg)); + return ; + } + else + { + branches.push_back (std::make_pair (new Cond ("==", s[0].ub), s[1].to)); + } + n -= 2; + s += 2; + } + if (n == 1) + { + if (s[0].to->label != from->label + 1) +// if (s[0].to->next && s[0].to->label != s[0].to->next->label + 1) + { + branches.push_back (std::make_pair (static_cast (NULL), s[0].to)); + } + return; + } + else if (n == 2 && bg == next) + { + branches.push_back (std::make_pair (new Cond (">=", s[0].ub), s[1].to)); + return; + } + else + { + branches.push_back (std::make_pair (new Cond ("<=", s[0].ub - 1), bg)); + n -= 1; + s += 1; + } + } +} + +void Linear::emit (OutputFile & o, uint ind, bool & readCh) +{ + for (uint i = 0; i < branches.size (); ++i) + { + if (branches[i].first != NULL) + { + output_if (o, ind, readCh, branches[i].first->compare, branches[i].first->value); + output_goto (o, 0, readCh, branches[i].second->label); + } + else + { + output_goto (o, ind, readCh, branches[i].second->label); + } + } +} + +If::If (type_t t, const Span * sp, uint nsp, const State * from, const State * next) + : type (t) + , info () +{ + switch (type) + { + case BINARY: + info.binary = new Binary (sp, nsp, from, next); + break; + case LINEAR: + info.linear = new Linear (sp, nsp, from, next); + break; + } +} + +void If::emit (OutputFile & o, uint ind, bool & readCh) +{ + switch (type) + { + case BINARY: + info.binary->emit (o, ind, readCh); + break; + case LINEAR: + info.linear->emit (o, ind, readCh); + break; + } +} + +SwitchIf::SwitchIf (const Span * sp, uint nsp, const State * from, const State * next) + : type (IF) + , info () +{ + if ((!sFlag && nsp > 2) || (nsp > 8 && (sp[nsp - 2].ub - sp[0].ub <= 3 * (nsp - 2)))) + { + type = SWITCH; + info.cases = new Cases (sp, nsp); + } + else if (nsp > 5) + { + info.ifs = new If (If::BINARY, sp, nsp, from, next); + } + else + { + info.ifs = new If (If::LINEAR, sp, nsp, from, next); + } +} + +void SwitchIf::emit (OutputFile & o, uint ind, bool & readCh) +{ + switch (type) + { + case SWITCH: + info.cases->emit (o, ind, readCh); + break; + case IF: + info.ifs->emit (o, ind, readCh); + break; + } +} + +Bitmap::Bitmap (const Span * span, uint nSpans, const Span * hspan, uint hSpans, const BitMap * bm, const State * bm_state, const State * from, const State * next) + : bitmap (bm) + , bitmap_state (bm_state) + , hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, from, next)) + , lgo (NULL) +{ + Span * bspan = new Span [nSpans]; + uint bSpans = unmap (bspan, span, nSpans, bm_state); + lgo = bSpans == 0 + ? NULL + : new SwitchIf (bspan, bSpans, from, next); + delete bspan; +} + +void Bitmap::emit (OutputFile & o, uint ind, bool & readCh) +{ + std::string yych = output_hgo (o, ind, readCh, hgo); + o << "if (" << mapCodeName["yybm"] << "[" << bitmap->i << "+" << yych << "] & "; + if (yybmHexTable) + { + o.write_hex (bitmap->m); + } + else + { + o << (uint) bitmap->m; + } + o << ") {\n"; + output_goto (o, ind + 1, readCh, bitmap_state->label); + o << indent (ind) << "}\n"; + if (lgo != NULL) + { + lgo->emit (o, ind, readCh); + } +} + +CpgotoTable::CpgotoTable (const Span * span, uint nSpans) + : table (new uint [0x100]) +{ + uint c = 0; + for (uint i = 0; i < nSpans; ++i) + { + vUsedLabels.insert(span[i].to->label); + for(; c < span[i].ub && c < 0x100; ++c) + { + table[c] = span[i].to->label; + } + } +} + +void CpgotoTable::emit (OutputFile & o, uint ind) +{ + o << indent (ind) << "static void *" << mapCodeName["yytarget"] << "[256] = {\n"; + o << indent (++ind); + for (uint i = 0; i <= 0xFF; ++i) + { + o << "&&" << labelPrefix << table[i]; + if (i == 0xFF) + { + o << "\n"; + } + else if (i % 8 == 7) + { + o << ",\n" << indent (ind); + } + else + { + o << "," << space (table[i]); + } + } + o << indent (--ind) << "};\n"; +} + +Cpgoto::Cpgoto (const Span * span, uint nSpans, const Span * hspan, uint hSpans, const State * from, const State * next) + : hgo (hSpans == 0 ? NULL : new SwitchIf (hspan, hSpans, from, next)) + , table (new CpgotoTable (span, nSpans)) +{} + +void Cpgoto::emit (OutputFile & o, uint ind, bool & readCh) +{ + std::string yych = output_hgo (o, ind, readCh, hgo); + o << "{\n"; + table->emit (o, ++ind); + o << indent(ind) << "goto *" << mapCodeName["yytarget"] << "[" << yych << "];\n"; + o << indent(--ind) << "}\n"; +} + +Dot::Dot (const Span * sp, uint nsp, const State * s) + : from (s) + , cases (new Cases (sp, nsp)) +{} + +void Dot::emit (OutputFile & o) +{ + const uint n = cases->cases_size; + if (n == 1) + { + o << from->label << " -> " << cases->cases[0].to->label << "\n"; + } + else + { + for (uint i = 0; i < n; ++i) + { + o << from->label << " -> " << cases->cases[i].to->label << " [label=\""; + for (uint j = 0; j < cases->cases[i].ranges.size (); ++j) + { + o.write_range (cases->cases[i].ranges[j].first, cases->cases[i].ranges[j].second); + } + o << "\"]\n"; + } + } +} + Go::Go () : nSpans (0) - , hSpans (0) , span (NULL) - , hspan (NULL) - , nBitmaps (0) - , bitmaps (NULL) + , type (NONE) + , info () {} -Go::~Go () +void Go::init (const State * from, const State * next) { - delete [] bitmaps; -} + if (nSpans == 0) + { + return; + } -void Go::init () -{ // initialize high (wide) spans + uint hSpans = 0; + const Span * hspan = NULL; for (uint i = 0; i < nSpans; ++i) { if (span[i].ub > 0x100) @@ -41,29 +421,79 @@ void Go::init () break; } } + // initialize bitmaps - bitmaps = new const BitMap * [nSpans]; - memset (bitmaps, 0, nSpans * sizeof (BitMap *)); + uint nBitmaps = 0; + const BitMap * bitmap = NULL; + const State * bitmap_state = NULL; for (uint i = 0; i < nSpans; ++i) { if (span[i].to && span[i].to->isBase) { const BitMap *b = BitMap::find (span[i].to); - if (b && matches(b->go, b->on, this, span[i].to)) + if (b && matches(b->go->span, b->go->nSpans, b->on, span, nSpans, span[i].to)) { - bitmaps[i] = b; + if (bitmap == NULL) + { + bitmap = b; + bitmap_state = span[i].to; + } nBitmaps++; } } } + + const uint dSpans = nSpans - hSpans - nBitmaps; + if (DFlag) + { + type = DOT; + info.dot = new Dot (span, nSpans, from); + } + else if (gFlag && (dSpans >= cGotoThreshold)) + { + type = CPGOTO; + info.cpgoto = new Cpgoto (span, nSpans, hspan, hSpans, from, next); + } + else if (bFlag && (nBitmaps > 0)) + { + type = BITMAP; + info.bitmap = new Bitmap (span, nSpans, hspan, hSpans, bitmap, bitmap_state, from, next); + bUsedYYBitmap = true; + } + else + { + type = SWITCH_IF; + info.switchif = new SwitchIf (span, nSpans, from, next); + } } -// All spans in g1 that lead to s1 are pairwise equal to that in g2 leading to s2 -bool matches(const Go * g1, const State * s1, const Go * g2, const State * s2) +void Go::emit (OutputFile & o, uint ind, bool & readCh) { - Span *b1 = g1->span, *e1 = &b1[g1->nSpans]; + switch (type) + { + case NONE: + break; + case SWITCH_IF: + info.switchif->emit (o, ind, readCh); + break; + case BITMAP: + info.bitmap->emit (o, ind, readCh); + break; + case CPGOTO: + info.cpgoto->emit (o, ind, readCh); + break; + case DOT: + info.dot->emit (o); + break; + } +} + +// All spans in b1 that lead to s1 are pairwise equal to that in b2 leading to s2 +bool matches(const Span * b1, uint n1, const State * s1, const Span * b2, uint n2, const State * s2) +{ + const Span * e1 = &b1[n1]; uint lb1 = 0; - Span *b2 = g2->span, *e2 = &b2[g2->nSpans]; + const Span * e2 = &b2[n2]; uint lb2 = 0; for (;;) @@ -93,4 +523,33 @@ bool matches(const Go * g1, const State * s1, const Go * g2, const State * s2) } } +/* + * Find all spans, that map to the given state. For each of them, + * find upper adjacent span, that maps to another state (if such + * span exists, otherwize try lower one). + * If input contains single span that maps to the given state, + * then output contains 0 spans. + */ +uint unmap (Span * new_span, const Span * old_span, uint old_nspans, const State * x) +{ + uint new_nspans = 0; + for (uint i = 0; i < old_nspans; ++i) + { + if (old_span[i].to != x) + { + if (new_nspans > 0 && new_span[new_nspans - 1].to == old_span[i].to) + new_span[new_nspans - 1].ub = old_span[i].ub; + else + { + new_span[new_nspans].to = old_span[i].to; + new_span[new_nspans].ub = old_span[i].ub; + ++new_nspans; + } + } + } + if (new_nspans > 0) + new_span[new_nspans - 1].ub = old_span[old_nspans - 1].ub; + return new_nspans; +} + } // namespace re2c diff --git a/re2c/go.h b/re2c/go.h index 9a3603ba..5753e870 100644 --- a/re2c/go.h +++ b/re2c/go.h @@ -2,6 +2,7 @@ #define _go_h #include +#include #include "basics.h" #include "code.h" @@ -9,31 +10,150 @@ namespace re2c { -class State; +class State; // forward +struct If; // forward struct Span { uint ub; State * to; + uint show (std::ostream&, uint) const; +}; + +struct Case +{ + std::vector > ranges; + const State * to; + void emit (OutputFile & o, uint ind); +}; + +struct Cases +{ + const State * def; + Case * cases; + uint cases_size; + void add (uint lb, uint ub, State * to); + Cases (const Span * s, uint n); + ~Cases (); + void emit (OutputFile & o, uint ind, bool & readCh); +}; + +struct Cond +{ + std::string compare; + uint value; + Cond (const std::string & cmp, uint val); + void emit (OutputFile & o, uint ind, bool & readCh); +}; + +struct Binary +{ + Cond * cond; + If * thn; + If * els; + Binary (const Span * s, uint n, const State * from, const State * next); + void emit (OutputFile & o, uint ind, bool & readCh); +}; + +struct Linear +{ + std::vector > branches; + Linear (const Span * s, uint n, const State * from, const State * next); + void emit (OutputFile & o, uint ind, bool & readCh); +}; - uint show(std::ostream&, uint) const; +struct If +{ + enum type_t + { + BINARY, + LINEAR + } type; + union + { + Binary * binary; + Linear * linear; + } info; + If (type_t t, const Span * sp, uint nsp, const State * from, const State * next); + void emit (OutputFile & o, uint ind, bool & readCh); +}; + +struct SwitchIf +{ + enum + { + SWITCH, + IF + } type; + union + { + Cases * cases; + If * ifs; + } info; + SwitchIf (const Span * sp, uint nsp, const State * from, const State * next); + void emit (OutputFile & o, uint ind, bool & readCh); +}; + +struct Bitmap +{ + const BitMap * bitmap; + const State * bitmap_state; + SwitchIf * hgo; + SwitchIf * lgo; + Bitmap (const Span * span, uint nSpans, const Span * hspan, uint hSpans, const BitMap * bm, const State * bm_state, const State * from, const State * next); + void emit (OutputFile & o, uint ind, bool & readCh); +}; + +struct CpgotoTable +{ + uint * table; + CpgotoTable (const Span * span, uint nSpans); + void emit (OutputFile & o, uint ind); +}; + +struct Cpgoto +{ + SwitchIf * hgo; + CpgotoTable * table; + Cpgoto (const Span * span, uint nSpans, const Span * hspan, uint hSpans, const State * from, const State * next); + void emit (OutputFile & o, uint ind, bool & readCh); +}; + +struct Dot +{ + const State * from; + Cases * cases; + Dot (const Span * sp, uint nsp, const State * from); + void emit (OutputFile & o); }; struct Go { uint nSpans; // number of spans - uint hSpans; // number of spans with upper bound > 0x100 Span * span; - Span * hspan; - uint nBitmaps; - const BitMap ** bitmaps; + enum + { + NONE, + SWITCH_IF, + BITMAP, + CPGOTO, + DOT + } type; + union + { + SwitchIf * switchif; + Bitmap * bitmap; + Cpgoto * cpgoto; + Dot * dot; + } info; Go (); - ~Go (); - void init (); + void init (const State * from, const State * next); + void emit (OutputFile & o, uint ind, bool & readCh); }; -bool matches(const Go * g1, const State * s1, const Go * g2, const State * s2); +bool matches(const Span * b1, uint n1, const State * s1, const Span * b2, uint n2, const State * s2); +uint unmap (Span * new_span, const Span * old_span, uint old_nspans, const State * x); } // namespace re2c -- 2.40.0