From 7cea5109651304819dd5e18af933bf15f4a45761 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 8 Jun 2015 22:25:32 +0100 Subject: [PATCH] Simplified creation of rule states and backup states. New simpler implementation uses STL containers instaed of sparse array. It's less efficient, but the place is not a bottleneck and simplicity is more important than efficiency. --- re2c/Makefile.am | 1 + re2c/src/codegen/emit_action.cc | 4 +-- re2c/src/codegen/emit_dfa.cc | 4 +-- re2c/src/codegen/prepare_dfa.cc | 24 +++-------------- re2c/src/dfa/action.h | 3 ++- re2c/src/util/uniq_vector.h | 48 +++++++++++++++++++++++++++++++++ 6 files changed, 59 insertions(+), 25 deletions(-) create mode 100644 re2c/src/util/uniq_vector.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 4937eb60..9edb3cdf 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -49,6 +49,7 @@ SRC_HDR = \ src/util/range.h \ src/util/smart_ptr.h \ src/util/substr.h \ + src/util/uniq_vector.h \ src/util/wrap_iterator.h SRC = \ src/codegen/bitmap.cc \ diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index c82f8b16..8658cc91 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -217,14 +217,14 @@ void emit_accept (OutputFile & o, uint32_t ind, bool & readCh, const State * con genGoTo(o, 0, s, accepts[i], readCh); } o << indent(ind) << "default:\t"; - genGoTo(o, 0, s, accepts.back (), readCh); + genGoTo(o, 0, s, accepts[accepts_size - 1], readCh); o << indent(ind) << "}\n"; } } else { // no need to write if statement here since there is only case 0. - genGoTo(o, ind, s, accepts.front (), readCh); + genGoTo(o, ind, s, accepts[0], readCh); } } } diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index 65148c9a..adadafa2 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -80,9 +80,9 @@ void DFA::count_used_labels (std::set & used, label_t start, label_t in { s->go.used_labels (used); } - for (accept_t::const_iterator i = accepts.begin (); i != accepts.end (); ++i) + for (uint32_t i = 0; i < accepts.size (); ++i) { - used.insert ((*i)->label); + used.insert (accepts[i]->label); } // must go last: it needs the set of used labels if (used.count (head->label)) diff --git a/re2c/src/codegen/prepare_dfa.cc b/re2c/src/codegen/prepare_dfa.cc index b908c178..b092bfb7 100644 --- a/re2c/src/codegen/prepare_dfa.cc +++ b/re2c/src/codegen/prepare_dfa.cc @@ -160,8 +160,6 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) findSCCs(); head->link = head; - uint32_t nRules = 0; - for (State * s = head; s; s = s->next) { s->depth = maxDist(s); @@ -169,20 +167,15 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) { max_fill = s->depth; } - if (s->rule && s->rule->accept >= nRules) - { - nRules = s->rule->accept + 1; - } } // create rule states - State ** rules = new State * [nRules]; - memset(rules, 0, (nRules)*sizeof(*rules)); + std::map rules; for (State * s = head; s; s = s->next) { if (s->rule) { - if (!rules[s->rule->accept]) + if (rules.find (s->rule->accept) == rules.end ()) { State *n = new State; n->action.set_rule (s->rule); @@ -220,9 +213,6 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) // find backup states and create accept state (if needed) if (default_state) { - uint32_t nSaves = 0; - uint32_t * saves = new uint32_t[nRules]; - memset(saves, ~0, (nRules)*sizeof(*saves)); for (State * s = head; s; s = s->next) { if (s->rule) @@ -231,24 +221,18 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) { if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE) { - if (saves[s->rule->accept] == ~0u) - { - saves[s->rule->accept] = nSaves++; - accepts.push_back (rules[s->rule->accept]); - } - s->action.set_save (saves[s->rule->accept]); + const uint32_t accept = accepts.find_or_add (rules[s->rule->accept]); + s->action.set_save (accept); } } } } - delete [] saves; if (accepts.size () > 1) { o.set_used_yyaccept (); } default_state->action.set_accept (&accepts); } - delete [] rules; // split ``base'' states into two parts for (State * s = head; s; s = s->next) diff --git a/re2c/src/dfa/action.h b/re2c/src/dfa/action.h index 0d33805a..d5914817 100644 --- a/re2c/src/dfa/action.h +++ b/re2c/src/dfa/action.h @@ -5,6 +5,7 @@ #include "src/codegen/label.h" #include "src/util/c99_stdint.h" +#include "src/util/uniq_vector.h" namespace re2c { @@ -24,7 +25,7 @@ struct Initial {} }; -typedef std::vector accept_t; +typedef uniq_vector_t accept_t; class Action { diff --git a/re2c/src/util/uniq_vector.h b/re2c/src/util/uniq_vector.h new file mode 100644 index 00000000..0e003a83 --- /dev/null +++ b/re2c/src/util/uniq_vector.h @@ -0,0 +1,48 @@ +#ifndef __UNIQ_VECTOR__ +#define __UNIQ_VECTOR__ + +#include + +#include "src/util/c99_stdint.h" + +namespace re2c +{ + +// wrapper over std::vector +// O(n) lookup +// O(n^2) insertion +template +class uniq_vector_t +{ + typedef std::vector elems_t; + elems_t elems; +public: + uniq_vector_t () + : elems () + {} + uint32_t size () const + { + return elems.size (); + } + const value_t & operator [] (uint32_t i) const + { + return elems[i]; + } + uint32_t find_or_add (const value_t & v) + { + const uint32_t size = elems.size (); + for (uint32_t i = 0; i < size; ++i) + { + if (elems[i] == v) + { + return i; + } + } + elems.push_back (v); + return size; + } +}; + +} // namespace re2c + +#endif // __UNIQ_VECTOR__ -- 2.40.0