From 841c06300be714fdba6563d7734109bb17381e35 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Thu, 5 Mar 2015 16:47:25 +0000 Subject: [PATCH] Simplified switch generetion in codegen. --- re2c/Makefile.am | 4 +-- re2c/cases.cc | 55 +++++++++++++++++++++++++++++++ re2c/cases.h | 34 ++++++++++++++++++++ re2c/code.cc | 84 +++++++++--------------------------------------- 4 files changed, 106 insertions(+), 71 deletions(-) create mode 100644 re2c/cases.cc create mode 100644 re2c/cases.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index e446b3a8..03ce4ed0 100755 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -2,10 +2,10 @@ bin_PROGRAMS = re2c win_BINARIES = $(WINBUILDDIR)/re2c.exe -re2c_SOURCES = code.cc dfa.cc main.cc parser.cc actions.cc scanner.re substr.cc range.cc \ +re2c_SOURCES = cases.cc code.cc dfa.cc main.cc parser.cc actions.cc scanner.re substr.cc range.cc \ translate.cc scanner.cc mbo_getopt.cc print.cc input.cc input_api.cc output.cc \ enc.cc utf8.cc utf8_range.cc utf8_regexp.cc utf16.cc utf16_range.cc utf16_regexp.cc range_suffix.cc \ - basics.h code.h code_names.h dfa.h enc.h indent.h input.h input_api.h free_list.h globals.h ins.h \ + basics.h cases.h code.h code_names.h dfa.h enc.h indent.h input.h input_api.h free_list.h globals.h ins.h \ mbo_getopt.h parser.h print.h range.h range_suffix.h re.h \ scanner.h smart_ptr.h substr.h token.h output.h \ utf16.h utf16_range.h utf16_regexp.h utf8.h utf8_range.h utf8_regexp.h diff --git a/re2c/cases.cc b/re2c/cases.cc new file mode 100644 index 00000000..e1fc3465 --- /dev/null +++ b/re2c/cases.cc @@ -0,0 +1,55 @@ +#include "cases.h" + +namespace re2c { + +Cases::Cases (const Span * span, uint span_size) + : def (span[span_size - 1].to) + , cases (new Case[span_size - 1]) + , cases_size (0) +{ + for (uint i = 0, lb = 0; i < span_size - 1; ++ i) + { + if (span[i].to != def) + { + add (lb, span[i].ub, span[i].to); + } + lb = span[i].ub; + } +} + +void Cases::add (uint lb, uint ub, State * to) +{ + for (uint i = 0; i < cases_size; ++i) + { + if (cases[i].to == to) + { + cases[i].ranges.push_back (std::make_pair (lb, ub)); + return; + } + } + cases[cases_size].ranges.push_back (std::make_pair (lb, ub)); + cases[cases_size].to = to; + ++cases_size; +} + +uint Cases::size () const +{ + return cases_size; +} + +State * Cases::default_case () const +{ + return def; +} + +const Case & Cases::operator [] (uint i) const +{ + return cases[i]; +} + +Cases::~Cases () +{ + delete [] cases; +} + +} // namespace re2c diff --git a/re2c/cases.h b/re2c/cases.h new file mode 100644 index 00000000..52ec707f --- /dev/null +++ b/re2c/cases.h @@ -0,0 +1,34 @@ +#ifndef _cases_h +#define _cases_h + +#include + +#include "dfa.h" + +namespace re2c { + +struct Case +{ + std::vector > ranges; + State * to; +}; + +class Cases +{ + State * def; + Case * cases; + uint cases_size; + + void add (uint lb, uint ub, State * to); + +public: + Cases (const Span * s, uint n); + ~Cases (); + uint size () const; + State * default_case () const; + const Case & operator [] (uint i) const; +}; + +} //namespace re2c + +#endif // _cases_h diff --git a/re2c/code.cc b/re2c/code.cc index 65d0d5c5..fb247c0c 100644 --- a/re2c/code.cc +++ b/re2c/code.cc @@ -10,6 +10,7 @@ #include #include +#include "cases.h" #include "code.h" #include "globals.h" #include "dfa.h" @@ -871,41 +872,32 @@ static void genCasesD(OutputFile & o, uint lb, Span *s, bool &newLine, const Sta } } -static bool genCases(OutputFile & o, uint ind, uint lb, Span *s, bool &newLine, uint mask) +static bool genCases (OutputFile & o, uint ind, const std::vector > & ranges, uint mask) { bool used = false; - if (!newLine) - { - o << "\n"; - } - newLine = true; - if (lb < s->ub) + for (uint i = 0; i < ranges.size (); ++i) { - for (;;) + for (uint b = ranges[i].first; b < ranges[i].second; ++b) { - if (!mask || lb > 0x00FF) + if (!mask || b > 0x00FF) // FIXME: delete this condition, check somewhere in unmap { o << indent(ind) << "case "; - o.write_char_hex (lb); + o.write_char_hex (b); o << ":"; if (dFlag && encoding.is(Enc::EBCDIC)) { - const uint c = encoding.decodeUnsafe(lb); + const uint c = encoding.decodeUnsafe(b); if (isprint(c)) o << " /* " << std::string(1, c) << " */"; } - newLine = false; used = true; } - - if (++lb == s->ub) + bool last_case = i == ranges.size () - 1 && b == ranges[i].second - 1; + if (!last_case) { - break; + o << "\n"; } - - o << "\n"; - newLine = true; } } @@ -983,26 +975,13 @@ void Go::genSwitchD (OutputFile & o, const State *from) const void Go::genSwitch(OutputFile & o, uint ind, const State *from, const State *next, bool &readCh, uint mask) const { - bool newLine = true; - if ((mask ? wSpans : nSpans) <= 2) { genLinear(o, ind, from, next, readCh, mask); } else { - State *def = span[nSpans - 1].to; - Span **sP = new Span * [nSpans - 1], **r, **s, **t; - - t = &sP[0]; - - for (uint i = 0; i < nSpans; ++i) - { - if (span[i].to != def) - { - *(t++) = &span[i]; - } - } + Cases cases (span, nSpans); if (dFlag) { @@ -1019,48 +998,15 @@ void Go::genSwitch(OutputFile & o, uint ind, const State *from, const State *nex o << indent(ind) << "switch (" << mapCodeName["yych"] << ") {\n"; } - while (t != &sP[0]) + for (uint i = 0; i < cases.size (); ++i) { - bool used = false; - - r = s = &sP[0]; - - const State *to = (*s)->to; - - if (*s == &span[0]) - { - used |= genCases(o, ind, 0, *s, newLine, mask); - } - else - { - used |= genCases(o, ind, (*s)[ -1].ub, *s, newLine, mask); - } - - while (++s < t) - { - if ((*s)->to == to) - { - used |= genCases(o, ind, (*s)[ -1].ub, *s, newLine, mask); - } - else - { - *(r++) = *s; - } - } - - if (used) - { - genGoTo(o, newLine ? ind+1 : 1, from, to, readCh); - newLine = true; - } - t = r; + genCases (o, ind, cases[i].ranges, mask); + genGoTo(o, 1, from, cases[i].to, readCh); } o << indent(ind) << "default:"; - genGoTo(o, 1, from, def, readCh); + genGoTo(o, 1, from, cases.default_case (), readCh); o << indent(ind) << "}\n"; - - delete [] sP; } } -- 2.40.0