From: Ulya Trofimovich Date: Tue, 9 Jun 2015 14:28:58 +0000 (+0100) Subject: Distinct restricted type for rule priority. X-Git-Tag: 0.15~214 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d2ca05e0032a299b52a36cf3563a9d3ecd3ac801;p=re2c Distinct restricted type for rule priority. As with labels, try to control how rule priorities are created: make a special counter that creates new priorities and disallow everyone but this counter do it. --- diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 9edb3cdf..0f767835 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -34,6 +34,7 @@ SRC_HDR = \ src/dfa/dfa.h \ src/dfa/ins.h \ src/dfa/re.h \ + src/dfa/rule_rank.h \ src/dfa/state.h \ src/globals.h \ src/mbo_getopt.h \ @@ -43,6 +44,7 @@ SRC_HDR = \ src/parse/token.h \ src/util/allocate.h \ src/util/c99_stdint.h \ + src/util/counter.h \ src/util/forbid_copy.h \ src/util/free_list.h \ src/util/local_increment.h \ @@ -78,6 +80,7 @@ SRC = \ src/dfa/encoding/utf8/utf8_range.cc \ src/dfa/encoding/utf8/utf8_regexp.cc \ src/dfa/dfa.cc \ + src/dfa/rule_rank.cc \ src/main.cc \ src/mbo_getopt.cc \ src/parse/input.cc \ diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index 8658cc91..87f33ad8 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -257,11 +257,11 @@ void emit_rule (OutputFile & o, uint32_t ind, const State * const s, const RuleO o << indent(ind); if (flag_skeleton) { - o << "{ if (cursor == &data[result[i].endpos] && result[i].rule == " << rule->accept << ") "; + o << "{ if (cursor == &data[result[i].endpos] && result[i].rule == " << rule->rank << ") "; o << "{ cursor = &data[result[i].startpos]; continue; }"; o << " else "; o << "{ printf (\"error: %lu/%u, %u/%u, '%s'\\n\", cursor - data, result[i].endpos, result[i].rule, " - << rule->accept + << rule->rank << ", &data[result[i].startpos]); return 1; } }"; } else if (rule->code->autogen) diff --git a/re2c/src/codegen/emit_dfa.cc b/re2c/src/codegen/emit_dfa.cc index adadafa2..147ce6da 100644 --- a/re2c/src/codegen/emit_dfa.cc +++ b/re2c/src/codegen/emit_dfa.cc @@ -70,7 +70,7 @@ void DFA::count_used_labels (std::set & used, label_t start, label_t in // In '-f' mode, default state is always state 0 if (fFlag) { - used.insert (label_counter_t::FIRST); + used.insert (label_t::first ()); } if (force_start) { diff --git a/re2c/src/codegen/go_emit.cc b/re2c/src/codegen/go_emit.cc index 1521a523..5da68ad6 100644 --- a/re2c/src/codegen/go_emit.cc +++ b/re2c/src/codegen/go_emit.cc @@ -190,7 +190,7 @@ void GoBitmap::emit (OutputFile & o, uint32_t ind, bool & readCh) label_t CpgotoTable::max_label () const { - label_t max = label_counter_t::FIRST; + label_t max = label_t::first (); for (uint32_t i = 0; i < TABLE_SIZE; ++i) { if (max < table[i]->label) @@ -205,7 +205,7 @@ void CpgotoTable::emit (OutputFile & o, uint32_t ind) { o << indent (ind) << "static void *" << mapCodeName["yytarget"] << "[256] = {\n"; o << indent (++ind); - const uint32_t max_digits = width (max_label ()); + const uint32_t max_digits = max_label ().width (); for (uint32_t i = 0; i < TABLE_SIZE; ++i) { o << "&&" << labelPrefix << table[i]->label; @@ -219,7 +219,7 @@ void CpgotoTable::emit (OutputFile & o, uint32_t ind) } else { - const uint32_t padding = max_digits - width (table[i]->label) + 1; + const uint32_t padding = max_digits - table[i]->label.width () + 1; o << "," << std::string (padding, ' '); } } diff --git a/re2c/src/codegen/label.cc b/re2c/src/codegen/label.cc index 9d170e32..c2e384fb 100644 --- a/re2c/src/codegen/label.cc +++ b/re2c/src/codegen/label.cc @@ -4,24 +4,39 @@ namespace re2c { -const label_t label_counter_t::FIRST (0); +const uint32_t label_t::FIRST = 0; -std::ostream & operator << (std::ostream & o, label_t l) +label_t::label_t () + : value (FIRST) +{} + +void label_t::inc () { - o << l.value; - return o; + ++value; +} + +label_t label_t::first () +{ + return label_t (); } -bool operator < (const label_t & l1, const label_t & l2) +bool label_t::operator < (const label_t & l) const { - return l1.value < l2.value; + return value < l.value; } -uint32_t width (label_t l) +uint32_t label_t::width () const { - uint32_t digits = 0; - while (l.value /= 10) ++digits; - return digits; + uint32_t v = value; + uint32_t n = 0; + while (v /= 10) ++n; + return n; +} + +std::ostream & operator << (std::ostream & o, label_t l) +{ + o << l.value; + return o; } } // namespace re2c diff --git a/re2c/src/codegen/label.h b/re2c/src/codegen/label.h index 75085b14..edc64844 100644 --- a/re2c/src/codegen/label.h +++ b/re2c/src/codegen/label.h @@ -4,43 +4,33 @@ #include #include "src/util/c99_stdint.h" +#include "src/util/counter.h" namespace re2c { +// label public API: +// - get first label +// - compare labels +// - get label width +// - output label to std::ostream +// +// label private API (for label counter): +// - get initial label +// - get next label class label_t { + static const uint32_t FIRST; uint32_t value; - explicit label_t (uint32_t v) - : value (v) - {} - friend class label_counter_t; - friend std::ostream & operator << (std::ostream & o, label_t l); - friend bool operator < (const label_t & l1, const label_t & l2); - friend uint32_t width (label_t l); -}; + label_t (); + void inc (); -class label_counter_t -{ public: - static const label_t FIRST; - -private: - label_t label; + static label_t first (); + bool operator < (const label_t & l) const; + uint32_t width () const; + friend std::ostream & operator << (std::ostream & o, label_t l); -public: - label_counter_t () - : label (FIRST) - {} - label_t next () - { - label_t l = label; - ++label.value; - return l; - } - void reset () - { - label = FIRST; - } + friend class counter_t; }; } // namespace re2c diff --git a/re2c/src/codegen/output.cc b/re2c/src/codegen/output.cc index d5155d4e..30adfa58 100644 --- a/re2c/src/codegen/output.cc +++ b/re2c/src/codegen/output.cc @@ -156,6 +156,12 @@ OutputFile & operator << (OutputFile & u, label_t l) return u; } +OutputFile & operator << (OutputFile & u, rule_rank_t r) +{ + u.stream () << r; + return u; +} + void OutputFile::insert_code () { blocks.back ()->fragments.push_back (new OutputFragment (OutputFragment::CODE, 0)); diff --git a/re2c/src/codegen/output.h b/re2c/src/codegen/output.h index c6800ded..0cd4b7fe 100644 --- a/re2c/src/codegen/output.h +++ b/re2c/src/codegen/output.h @@ -7,6 +7,7 @@ #include #include "src/codegen/label.h" +#include "src/dfa/rule_rank.h" #include "src/util/c99_stdint.h" #include "src/util/forbid_copy.h" @@ -52,7 +53,7 @@ private: std::vector blocks; public: - label_counter_t label_counter; + counter_t label_counter; private: std::ostream & stream (); @@ -78,6 +79,7 @@ public: friend OutputFile & operator << (OutputFile & o, const std::string & s); friend OutputFile & operator << (OutputFile & o, const char * s); friend OutputFile & operator << (OutputFile & o, label_t l); + friend OutputFile & operator << (OutputFile & o, rule_rank_t l); void insert_line_info (); void insert_state_goto (uint32_t ind); diff --git a/re2c/src/codegen/prepare_dfa.cc b/re2c/src/codegen/prepare_dfa.cc index b092bfb7..c5ef6255 100644 --- a/re2c/src/codegen/prepare_dfa.cc +++ b/re2c/src/codegen/prepare_dfa.cc @@ -170,23 +170,23 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) } // create rule states - std::map rules; + std::map rules; for (State * s = head; s; s = s->next) { if (s->rule) { - if (rules.find (s->rule->accept) == rules.end ()) + if (rules.find (s->rule->rank) == rules.end ()) { State *n = new State; n->action.set_rule (s->rule); - rules[s->rule->accept] = n; + rules[s->rule->rank] = n; addState(&s->next, n); } for (uint32_t i = 0; i < s->go.nSpans; ++i) { if (!s->go.span[i].to) { - s->go.span[i].to = rules[s->rule->accept]; + s->go.span[i].to = rules[s->rule->rank]; } } } @@ -221,7 +221,7 @@ void DFA::prepare(OutputFile & o, uint32_t & max_fill) { if (!s->go.span[i].to->rule && s->go.span[i].to->action.type != Action::RULE) { - const uint32_t accept = accepts.find_or_add (rules[s->rule->accept]); + const uint32_t accept = accepts.find_or_add (rules[s->rule->rank]); s->action.set_save (accept); } } diff --git a/re2c/src/codegen/skeleton/path.cc b/re2c/src/codegen/skeleton/path.cc index 52c05b3c..ff2c6ca8 100644 --- a/re2c/src/codegen/skeleton/path.cc +++ b/re2c/src/codegen/skeleton/path.cc @@ -6,22 +6,22 @@ namespace re2c namespace skeleton { -Path::Path (const chars_t & cs, uint32_t l, uint32_t r) +Path::Path (const chars_t & cs, uint32_t l, rule_rank_t r) : chars (cs) , length (l) , rule (r) {} -void Path::update (uint32_t r) +void Path::update (rule_rank_t r) { - if (r != NO_RULE) + if (!r.is_none ()) { length = chars.size (); rule = r; } } -void Path::extend (uint32_t r, uint32_t c) +void Path::extend (rule_rank_t r, uint32_t c) { update (r); chars.push_back (c); @@ -29,7 +29,7 @@ void Path::extend (uint32_t r, uint32_t c) void Path::append (const Path * p) { - if (p->rule != NO_RULE) + if (!p->rule.is_none ()) { length = chars.size () + p->length; rule = p->rule; diff --git a/re2c/src/codegen/skeleton/path.h b/re2c/src/codegen/skeleton/path.h index 440bb3e8..1d38af3b 100644 --- a/re2c/src/codegen/skeleton/path.h +++ b/re2c/src/codegen/skeleton/path.h @@ -3,6 +3,7 @@ #include +#include "src/dfa/rule_rank.h" #include "src/util/c99_stdint.h" namespace re2c @@ -11,19 +12,17 @@ namespace re2c namespace skeleton { -const uint32_t NO_RULE = 0xFFFFffff; - struct Path { typedef std::vector chars_t; chars_t chars; uint32_t length; - uint32_t rule; + rule_rank_t rule; - Path (const chars_t & cs, uint32_t l, uint32_t r); - void update (uint32_t r); - void extend (uint32_t r, uint32_t c); + Path (const chars_t & cs, uint32_t l, rule_rank_t r); + void update (rule_rank_t r); + void extend (rule_rank_t r, uint32_t c); void append (const Path * p); }; diff --git a/re2c/src/codegen/skeleton/skeleton.cc b/re2c/src/codegen/skeleton/skeleton.cc index 76cc2f77..ca243c86 100644 --- a/re2c/src/codegen/skeleton/skeleton.cc +++ b/re2c/src/codegen/skeleton/skeleton.cc @@ -14,14 +14,14 @@ const uint32_t Node::UNKNOWN_LEN = 0xFFFFffff; Node::Node (const State * s, const s2n_map & s2n) : arcs () , loop (0) - , rule (NO_RULE) + , rule (rule_rank_t::none ()) , path_len (UNKNOWN_LEN) , path (NULL) { const bool is_accepting = s && s->rule; if (is_accepting) { - rule = s->rule->accept; + rule = s->rule->rank; } const bool is_final = !s || (s->go.nSpans == 1 && !s->go.span[0].to); @@ -219,7 +219,7 @@ Skeleton::~Skeleton () void Skeleton::generate_paths (std::vector & results) { std::vector prefixes; - prefixes.push_back (Path (Path::chars_t (), 0, NO_RULE)); + prefixes.push_back (Path (Path::chars_t (), 0, rule_rank_t::none ())); if (nodes->estimate_size_all (1, 0) == DATA_LIMIT) { diff --git a/re2c/src/codegen/skeleton/skeleton.h b/re2c/src/codegen/skeleton/skeleton.h index 3ca28a2a..b283dbf9 100644 --- a/re2c/src/codegen/skeleton/skeleton.h +++ b/re2c/src/codegen/skeleton/skeleton.h @@ -31,7 +31,7 @@ struct Node uint8_t loop; // rule number for corresponding DFA state (if any) - uint32_t rule; + rule_rank_t rule; // stuff for constructing path cover (for large graphs) static const uint32_t UNKNOWN_LEN; diff --git a/re2c/src/dfa/actions.cc b/re2c/src/dfa/actions.cc index 80b05104..01a7ee08 100644 --- a/re2c/src/dfa/actions.cc +++ b/re2c/src/dfa/actions.cc @@ -50,7 +50,7 @@ const Ins* showIns(std::ostream &o, const Ins &i, const Ins &base) break; case TERM: - o << "term " << ((RuleOp*) i.i.link)->accept; + o << "term " << ((RuleOp*) i.i.link)->rank; break; } @@ -880,11 +880,11 @@ RegExp * Scanner::mkDefault() const return new MatchOp(def); } -RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, uint32_t a, InsAccess access) +RuleOp::RuleOp(RegExp *e, RegExp *c, Token *t, rule_rank_t r, InsAccess access) : exp(e) , ctx(c) , ins(NULL) - , accept(a) + , rank(r) , code(t) , line(0) { diff --git a/re2c/src/dfa/dfa.cc b/re2c/src/dfa/dfa.cc index 922a893b..246e2121 100644 --- a/re2c/src/dfa/dfa.cc +++ b/re2c/src/dfa/dfa.cc @@ -78,7 +78,7 @@ DFA::DFA(Ins *ins, uint32_t ni, uint32_t lb, uint32_t ub, const Char *rep) } else if (i->i.tag == TERM) { - if (!s->rule || ((RuleOp*) i->i.link)->accept < s->rule->accept) + if (!s->rule || ((RuleOp*) i->i.link)->rank < s->rule->rank) s->rule = (RuleOp*) i->i.link; } else if (i->i.tag == CTXT) diff --git a/re2c/src/dfa/re.h b/re2c/src/dfa/re.h index 483f1d02..f6c73c68 100644 --- a/re2c/src/dfa/re.h +++ b/re2c/src/dfa/re.h @@ -9,6 +9,7 @@ #include #include "src/dfa/ins.h" +#include "src/dfa/rule_rank.h" #include "src/globals.h" #include "src/parse/token.h" #include "src/util/range.h" @@ -164,12 +165,12 @@ private: public: RegExp *ctx; Ins *ins; - uint32_t accept; + rule_rank_t rank; Token *code; uint32_t line; public: - RuleOp(RegExp*, RegExp*, Token*, uint32_t, InsAccess); + RuleOp(RegExp*, RegExp*, Token*, rule_rank_t, InsAccess); ~RuleOp() { diff --git a/re2c/src/dfa/rule_rank.cc b/re2c/src/dfa/rule_rank.cc new file mode 100644 index 00000000..4120ff28 --- /dev/null +++ b/re2c/src/dfa/rule_rank.cc @@ -0,0 +1,42 @@ +#include + +#include "src/dfa/rule_rank.h" + +namespace re2c +{ + +const uint32_t rule_rank_t::NONE = 0xFFFFffff; + +rule_rank_t::rule_rank_t () + : value (0) +{} + +void rule_rank_t::inc () +{ + ++value; +} + +rule_rank_t rule_rank_t::none () +{ + rule_rank_t r; + r.value = NONE; + return r; +} + +bool rule_rank_t::is_none () const +{ + return value == NONE; +} + +bool rule_rank_t::operator < (const rule_rank_t & r) const +{ + return value < r.value; +} + +std::ostream & operator << (std::ostream & o, rule_rank_t r) +{ + o << r.value; + return o; +} + +} // namespace re2c diff --git a/re2c/src/dfa/rule_rank.h b/re2c/src/dfa/rule_rank.h new file mode 100644 index 00000000..1c461365 --- /dev/null +++ b/re2c/src/dfa/rule_rank.h @@ -0,0 +1,39 @@ +#ifndef __RULE_RANK__ +#define __RULE_RANK__ + +#include + +#include "src/util/c99_stdint.h" +#include "src/util/counter.h" + +namespace re2c +{ + +// rule rank public API: +// - get rule rank corresponding to nonexistent rule +// - check if rank corresponds to nonexistent rule +// - compare ranks +// - output rank to std::ostream +// +// rule rank private API (for rule rank counter): +// - get first rank +// - get next rank +class rule_rank_t +{ + static const uint32_t NONE; + uint32_t value; + rule_rank_t (); + void inc (); + +public: + static rule_rank_t none (); + bool is_none () const; + bool operator < (const rule_rank_t & r) const; + friend std::ostream & operator << (std::ostream & o, rule_rank_t r); + + friend class counter_t; +}; + +} // namespace re2c + +#endif // __RULE_RANK__ diff --git a/re2c/src/dfa/state.h b/re2c/src/dfa/state.h index bb06b71c..0261ed60 100644 --- a/re2c/src/dfa/state.h +++ b/re2c/src/dfa/state.h @@ -26,7 +26,7 @@ public: Action action; State () - : label (label_counter_t::FIRST) + : label (label_t::first ()) , rule (NULL) , next (0) , link (NULL) diff --git a/re2c/src/parse/parser.ypp b/re2c/src/parse/parser.ypp index 5e09281a..164f7db5 100644 --- a/re2c/src/parse/parser.ypp +++ b/re2c/src/parse/parser.ypp @@ -26,7 +26,7 @@ int yylex(); void yyerror(const char*); } -static uint32_t accept; +static counter_t rank_counter; static re2c::RegExpMap specMap; static RegExp *spec = NULL, *specNone = NULL; static RuleOpList specStar; @@ -87,7 +87,7 @@ void context_rule(CondList *clist, RegExp *expr, RegExp *look, const std::string for(CondList::const_iterator it = clist->begin(); it != clist->end(); ++it) { Token *token = new Token(code, in->get_fname (), in->get_cline (), newcond);//condcpy); - RuleOp *rule = new RuleOp(expr, look, token, accept++, ins_access); + RuleOp *rule = new RuleOp(expr, look, token, rank_counter.next (), ins_access); RegExpMap::iterator itRE = specMap.find(*it); @@ -228,7 +228,7 @@ rule: { in->fatal("condition or '<*>' required when using -c switch"); } - $$ = new RuleOp($1, $2, $3, accept++, RegExp::SHARED); + $$ = new RuleOp($1, $2, $3, rank_counter.next (), RegExp::SHARED); spec = spec? mkAlt(spec, $$) : $$; } | STAR CODE /* default rule */ @@ -270,7 +270,7 @@ rule: Token *token = new Token($7, $7->source, $7->line, $6); delete $7; delete $6; - specStar.push_back(new RuleOp($4, $5, token, accept++, RegExp::PRIVATE)); + specStar.push_back(new RuleOp($4, $5, token, rank_counter.next (), RegExp::PRIVATE)); } | '<' STAR '>' expr look ':' newcond { @@ -278,7 +278,7 @@ rule: context_check(NULL); Token *token = new Token(NULL, in->get_fname (), in->get_cline (), $7); delete $7; - specStar.push_back(new RuleOp($4, $5, token, accept++, RegExp::PRIVATE)); + specStar.push_back(new RuleOp($4, $5, token, rank_counter.next (), RegExp::PRIVATE)); } | '<' STAR '>' look newcond CODE { @@ -307,7 +307,7 @@ rule: Token *token = new Token($3, $3->source, $3->line, $2); delete $2; delete $3; - $$ = specNone = new RuleOp(new NullOp(), new NullOp(), token, accept++, RegExp::SHARED); + $$ = specNone = new RuleOp(new NullOp(), new NullOp(), token, rank_counter.next (), RegExp::SHARED); } | NOCOND ':' newcond { @@ -319,7 +319,7 @@ rule: } Token *token = new Token(NULL, in->get_fname (), in->get_cline (), $3); delete $3; - $$ = specNone = new RuleOp(new NullOp(), new NullOp(), token, accept++, RegExp::SHARED); + $$ = specNone = new RuleOp(new NullOp(), new NullOp(), token, rank_counter.next (), RegExp::SHARED); } | SETUP STAR '>' CODE { @@ -544,7 +544,7 @@ void parse(Scanner& i, Output & o) { dfa_map.clear(); } - accept = 0; + rank_counter.reset (); spec = NULL; ruleDefault = NULL; in->set_in_parse(true); @@ -561,7 +561,7 @@ void parse(Scanner& i, Output & o) dfa_map.clear(); parse_cleanup(); spec = NULL; - accept = 0; + rank_counter.reset (); ruleDefault = NULL; in->set_in_parse(true); yyparse(); @@ -590,7 +590,7 @@ void parse(Scanner& i, Output & o) // now that all rules have been parsed, we can fix it for (RuleOpList::const_iterator itOp = specStar.begin(); itOp != specStar.end(); ++itOp) { - (*itOp)->accept = accept++; + (*itOp)->rank = rank_counter.next (); } // merge <*> rules to all conditions // note that all conditions use the same regexp for <*> rules, @@ -651,7 +651,7 @@ void parse(Scanner& i, Output & o) itRuleDefault = ruleDefaultMap.find(it->first); if (itRuleDefault != ruleDefaultMap.end()) { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), itRuleDefault->second, accept++, RegExp::SHARED); + RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), itRuleDefault->second, rank_counter.next (), RegExp::SHARED); it->second.second = it->second.second ? mkAlt(def, it->second.second) : def; } else @@ -659,7 +659,7 @@ void parse(Scanner& i, Output & o) itRuleDefault = ruleDefaultMap.find("*"); if (itRuleDefault != ruleDefaultMap.end()) { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), itRuleDefault->second, accept++, RegExp::SHARED); + RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), itRuleDefault->second, rank_counter.next (), RegExp::SHARED); it->second.second = it->second.second ? mkAlt(def, it->second.second) : def; } } @@ -677,7 +677,7 @@ void parse(Scanner& i, Output & o) { if (ruleDefault != NULL && parseMode != Scanner::Reuse) { - RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), ruleDefault, accept++, RegExp::SHARED); + RuleOp * def = new RuleOp(in->mkDefault(), new NullOp(), ruleDefault, rank_counter.next (), RegExp::SHARED); spec = spec ? mkAlt(def, spec) : def; } if (spec || !dfa_map.empty()) diff --git a/re2c/src/util/counter.h b/re2c/src/util/counter.h new file mode 100644 index 00000000..4047f844 --- /dev/null +++ b/re2c/src/util/counter.h @@ -0,0 +1,29 @@ +#ifndef __RE2C_UTIL_COUNTER__ +#define __RE2C_UTIL_COUNTER__ + +namespace re2c { + +template +class counter_t +{ + num_t num; + +public: + counter_t () + : num () + {} + num_t next () + { + num_t n = num; + num.inc (); + return n; + } + void reset () + { + num = num_t (); + } +}; + +} // namespace re2c + +#endif // __RE2C_UTIL_COUNTER__