From 286d738a019d9584e014186ecbcf9979a407325c Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Mon, 31 Oct 2016 15:59:57 +0000 Subject: [PATCH] Restricted 'Tagpool' lifetime to DFA construction phase. --- re2c/Makefile.am | 2 + re2c/src/codegen/emit_action.cc | 2 +- re2c/src/ir/adfa/adfa.cc | 2 - re2c/src/ir/adfa/adfa.h | 1 - re2c/src/ir/dfa/determinization.cc | 2 +- re2c/src/ir/dfa/dfa.h | 2 +- re2c/src/ir/dfa/fallback_tags.cc | 2 +- re2c/src/ir/dfa/tag_liveness.cc | 13 ++---- re2c/src/ir/dfa/tag_optimize.cc | 4 +- re2c/src/ir/dfa/tag_optimize.h | 2 +- re2c/src/ir/dfa/tag_renaming.cc | 73 ++++++++++-------------------- re2c/src/ir/dfa/tagpool.cc | 63 ++++++++++++++++++++++++++ re2c/src/ir/dfa/tagpool.h | 34 ++++++++++++++ re2c/src/ir/nfa/init_rules.cc | 7 ++- re2c/src/ir/nfa/nfa.cc | 4 +- re2c/src/ir/nfa/nfa.h | 6 +-- re2c/src/ir/rule.h | 8 +++- re2c/src/ir/skeleton/skeleton.cc | 2 +- re2c/src/ir/tag.cc | 56 ----------------------- re2c/src/ir/tag.h | 23 +--------- 20 files changed, 148 insertions(+), 160 deletions(-) create mode 100644 re2c/src/ir/dfa/tagpool.cc create mode 100644 re2c/src/ir/dfa/tagpool.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 7bbc82f0..778cc478 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -26,6 +26,7 @@ SRC_HDR = \ src/ir/dfa/dfa.h \ src/ir/dfa/find_state.h \ src/ir/dfa/tag_optimize.h \ + src/ir/dfa/tagpool.h \ src/ir/nfa/nfa.h \ src/ir/regexp/encoding/case.h \ src/ir/regexp/encoding/enc.h \ @@ -105,6 +106,7 @@ SRC = \ src/ir/dfa/tag_liveness.cc \ src/ir/dfa/tag_optimize.cc \ src/ir/dfa/tag_renaming.cc \ + src/ir/dfa/tagpool.cc \ src/ir/regexp/encoding/enc.cc \ src/ir/regexp/encoding/range_suffix.cc \ src/ir/regexp/encoding/utf8/utf8_regexp.cc \ diff --git a/re2c/src/codegen/emit_action.cc b/re2c/src/codegen/emit_action.cc index 6295b92e..9c153621 100644 --- a/re2c/src/codegen/emit_action.cc +++ b/re2c/src/codegen/emit_action.cc @@ -427,7 +427,7 @@ void gen_fintags(OutputFile &o, uint32_t ind, const DFA &dfa, const Rule &rule) { const bool generic = opts->input_api.type() == InputAPI::CUSTOM; const std::valarray &tags = dfa.tags; - const tagver_t *vers = dfa.tagpool[rule.tags]; + const tagver_t *vers = rule.tags; // trailing context if (rule.trail != Tag::NONE) { diff --git a/re2c/src/ir/adfa/adfa.cc b/re2c/src/ir/adfa/adfa.cc index 226c5bdf..63fcd1d7 100644 --- a/re2c/src/ir/adfa/adfa.cc +++ b/re2c/src/ir/adfa/adfa.cc @@ -35,7 +35,6 @@ DFA::DFA , head(NULL) , rules(dfa.rules) , tags(dfa.tags) - , tagpool(dfa.tagpool) , max_fill (0) , need_backup (false) , need_accept (false) @@ -97,7 +96,6 @@ DFA::~DFA() delete skeleton; delete &rules; delete &tags; - delete &tagpool; } /* note [reordering DFA states] diff --git a/re2c/src/ir/adfa/adfa.h b/re2c/src/ir/adfa/adfa.h index fb0d8f7f..b7354bb3 100644 --- a/re2c/src/ir/adfa/adfa.h +++ b/re2c/src/ir/adfa/adfa.h @@ -68,7 +68,6 @@ struct DFA State * head; std::valarray &rules; std::valarray &tags; - Tagpool &tagpool; size_t max_fill; bool need_backup; bool need_accept; diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index 3a93da4a..c16f39a4 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -55,10 +55,10 @@ dfa_t::dfa_t(const nfa_t &nfa, , nchars(charset.size() - 1) // (n + 1) bounds for n ranges , rules(nfa.rules) , tags(*nfa.tags) - , tagpool(*nfa.tagpool) , maxtagver(0) { const size_t ntag = tags.size(); + Tagpool tagpool(ntag); clospool_t clospool; closure_t clos1, clos2; bool *badtags = new bool[ntag](); diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index 7fa5eb24..b2df17f7 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -9,6 +9,7 @@ #include "src/ir/regexp/regexp.h" #include "src/ir/rule.h" #include "src/ir/tag.h" +#include "src/ir/dfa/tagpool.h" #include "src/util/forbid_copy.h" namespace re2c @@ -50,7 +51,6 @@ struct dfa_t const size_t nchars; std::valarray &rules; std::valarray &tags; - Tagpool &tagpool; tagver_t maxtagver; dfa_t(const nfa_t &nfa, const charset_t &charset, diff --git a/re2c/src/ir/dfa/fallback_tags.cc b/re2c/src/ir/dfa/fallback_tags.cc index e850e548..0956df2e 100644 --- a/re2c/src/ir/dfa/fallback_tags.cc +++ b/re2c/src/ir/dfa/fallback_tags.cc @@ -78,7 +78,7 @@ void insert_fallback_tags(dfa_t &dfa) std::fill(owrt, owrt + nver, false); find_overwritten_tags(dfa, i, been, owrt); - const tagver_t *fin = dfa.tagpool[dfa.rules[s->rule].tags]; + const tagver_t *fin = dfa.rules[s->rule].tags; for (const tagsave_t *p = s->rule_tags.save; p; p = p->next) { owrt[p->ver] = false; } diff --git a/re2c/src/ir/dfa/tag_liveness.cc b/re2c/src/ir/dfa/tag_liveness.cc index 2df92cf9..739b9c14 100644 --- a/re2c/src/ir/dfa/tag_liveness.cc +++ b/re2c/src/ir/dfa/tag_liveness.cc @@ -7,11 +7,10 @@ namespace re2c void tag_liveness(const cfg_t &cfg, bool *live) { - const Tagpool &tagpool = cfg.dfa.tagpool; const size_t nbb = cfg.nbblock, nver = static_cast(cfg.dfa.maxtagver) + 1, - ntag = tagpool.ntags; + ntag = cfg.dfa.tags.size(); bool *buf1 = new bool[nver]; bool *buf2 = new bool[nver]; @@ -32,11 +31,10 @@ void tag_liveness(const cfg_t &cfg, bool *live) const cfg_bb_t *b = cfg.bblocks + i; memset(buf1, 0, nver * sizeof(bool)); - if (b->use != TAGVER_ZERO) { + if (b->use) { // final bblock, no successors - const tagver_t *use = tagpool[b->use]; for (size_t t = 0; t < ntag; ++t) { - const tagver_t u = use[t]; + const tagver_t u = b->use[t]; if (u != TAGVER_ZERO) { buf1[u] = true; } @@ -83,12 +81,11 @@ void tag_liveness(const cfg_t &cfg, bool *live) for (cfg_ix_t i = 0; i < nbb; ++i) { const cfg_bb_t *b = cfg.bblocks + i; - if (b->use == TAGVER_ZERO) continue; + if (!b->use) continue; - const tagver_t *use = tagpool[b->use]; memset(buf1, 0, nver * sizeof(bool)); for (size_t t = 0; t < ntag; ++t) { - const tagver_t u = use[t]; + const tagver_t u = b->use[t]; if (u != TAGVER_ZERO) { buf1[u] = true; } diff --git a/re2c/src/ir/dfa/tag_optimize.cc b/re2c/src/ir/dfa/tag_optimize.cc index 305c2c72..8a208dea 100644 --- a/re2c/src/ir/dfa/tag_optimize.cc +++ b/re2c/src/ir/dfa/tag_optimize.cc @@ -8,7 +8,7 @@ namespace re2c static cfg_ix_t map_arcs_to_bblocks(const dfa_t &dfa, cfg_ix_t *arc2bb); static cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbblock); -static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, const tagcmd_t *cmd, size_t use); +static void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, const cfg_ix_t *succe, const tagcmd_t *cmd, tagver_t *use); static void successors(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x); static void fallback(const dfa_t &dfa, const cfg_ix_t *arc2bb, bool *been, cfg_ix_t *&succ, size_t x); @@ -122,7 +122,7 @@ cfg_bb_t *create_bblocks(const dfa_t &dfa, const cfg_ix_t *arc2bb, cfg_ix_t nbbl } void basic_block(cfg_bb_t *bb, const cfg_ix_t *succb, - const cfg_ix_t *succe, const tagcmd_t *cmd, size_t use) + const cfg_ix_t *succe, const tagcmd_t *cmd, tagver_t *use) { const size_t n = static_cast(succe - succb); cfg_ix_t *s = new cfg_ix_t[n]; diff --git a/re2c/src/ir/dfa/tag_optimize.h b/re2c/src/ir/dfa/tag_optimize.h index 4bb0bac3..5252621a 100644 --- a/re2c/src/ir/dfa/tag_optimize.h +++ b/re2c/src/ir/dfa/tag_optimize.h @@ -13,7 +13,7 @@ struct cfg_bb_t cfg_ix_t *succb; cfg_ix_t *succe; tagcmd_t *cmd; - size_t use; + tagver_t *use; }; // control flow graph diff --git a/re2c/src/ir/dfa/tag_renaming.cc b/re2c/src/ir/dfa/tag_renaming.cc index 1582910d..4e954b87 100644 --- a/re2c/src/ir/dfa/tag_renaming.cc +++ b/re2c/src/ir/dfa/tag_renaming.cc @@ -3,66 +3,41 @@ namespace re2c { -static void rename_rule(Tagpool &tagpool, size_t &tags, const tagver_t *ver2new); -static void rename_save(tagsave_t **psave, const tagver_t *ver2new); -static void rename_copy(tagcopy_t **pcopy, const tagver_t *ver2new); - void tag_renaming(cfg_t &cfg, const tagver_t *ver2new, tagver_t maxver) { tagver_t &oldmax = cfg.dfa.maxtagver; - if (maxver >= oldmax) { - assert(maxver == oldmax); - return; - } + if (oldmax == maxver) return; oldmax = maxver; cfg_bb_t *b = cfg.bblocks, *e = b + cfg.nbblock; for (; b < e; ++b) { - rename_save(&b->cmd->save, ver2new); - rename_copy(&b->cmd->copy, ver2new); - } - - std::valarray &rules = cfg.dfa.rules; - Tagpool &tagpool = cfg.dfa.tagpool; - for (size_t i = 0; i < rules.size(); ++i) { - rename_rule(tagpool, rules[i].tags, ver2new); - } -} - -void rename_rule(Tagpool &tagpool, size_t &tags, const tagver_t *ver2new) -{ - if (tags == ZERO_TAGS) return; - - const tagver_t *oldver = tagpool[tags]; - tagver_t *newver = tagpool.buffer1; - const size_t ntag = tagpool.ntags; - for (size_t t = 0; t < ntag; ++t) { - const tagver_t v = oldver[t]; - newver[t] = v != TAGVER_ZERO - ? ver2new[v] - : v; - } - - tags = tagpool.insert(newver); -} + // tag versions in save commands + for (tagsave_t *p = b->cmd->save; p; p = p->next) { + p->ver = ver2new[p->ver]; + } -void rename_save(tagsave_t **psave, const tagver_t *ver2new) -{ - for (tagsave_t *p = *psave; p; p = p->next) { - p->ver = ver2new[p->ver]; + // tag versions in copy commands + for (tagcopy_t *c, **pc = &b->cmd->copy; (c = *pc);) { + c->lhs = ver2new[c->lhs]; + c->rhs = ver2new[c->rhs]; + if (c->lhs == c->rhs) { + *pc = c->next; + } else { + pc = &c->next; + } + } } -} -void rename_copy(tagcopy_t **pcopy, const tagver_t *ver2new) -{ - for (tagcopy_t *c, **pc = pcopy; (c = *pc);) { - c->lhs = ver2new[c->lhs]; - c->rhs = ver2new[c->rhs]; - if (c->lhs == c->rhs) { - *pc = c->next; - } else { - pc = &c->next; + // final tag versions in rules + std::valarray &rules = cfg.dfa.rules; + for (size_t r = 0, t = 0; r < rules.size(); ++r) { + Rule &rule = rules[r]; + for (; t < rule.htag; ++t) { + tagver_t &v = rule.tags[t]; + if (v != TAGVER_ZERO) { + v = ver2new[v]; + } } } } diff --git a/re2c/src/ir/dfa/tagpool.cc b/re2c/src/ir/dfa/tagpool.cc new file mode 100644 index 00000000..1311f06d --- /dev/null +++ b/re2c/src/ir/dfa/tagpool.cc @@ -0,0 +1,63 @@ +#include +#include // malloc +#include // memcpy, memcmp + +#include "src/ir/dfa/tagpool.h" +#include "src/util/hash32.h" + +namespace re2c +{ + +struct eqtag_t +{ + size_t ntags; + + explicit eqtag_t(size_t n): ntags(n) {} + inline tagver_t operator()(const tagver_t *x, const tagver_t *y) const + { + return memcmp(x, y, ntags * sizeof(tagver_t)) == 0; + } +}; + +Tagpool::Tagpool(size_t n) + : lookup() + , buffer(new tagver_t[n * 3]) + , ntags(n) + , buffer1(&buffer[n * 1]) + , buffer2(&buffer[n * 2]) +{ + // all-zero tag configuration must have static number zero + std::fill(buffer, buffer + ntags, TAGVER_ZERO); + assert(ZERO_TAGS == insert(buffer)); +} + +Tagpool::~Tagpool() +{ + delete[] buffer; + const size_t n = lookup.size(); + for (size_t i = 0; i < n; ++i) { + free(const_cast(lookup[i])); + } +} + +size_t Tagpool::insert(const tagver_t *tags) +{ + const size_t size = ntags * sizeof(tagver_t); + const uint32_t hash = hash32(0, tags, size); + + const size_t idx = lookup.find_with(hash, tags, eqtag_t(ntags)); + if (idx != taglookup_t::NIL) { + return idx; + } + + tagver_t *copy = static_cast(malloc(size)); + memcpy(copy, tags, size); + return lookup.push(hash, copy); +} + +const tagver_t *Tagpool::operator[](size_t idx) const +{ + return lookup[idx]; +} + +} // namespace re2c diff --git a/re2c/src/ir/dfa/tagpool.h b/re2c/src/ir/dfa/tagpool.h new file mode 100644 index 00000000..f7dddfb1 --- /dev/null +++ b/re2c/src/ir/dfa/tagpool.h @@ -0,0 +1,34 @@ +#ifndef _RE2C_IR_DFA_TAGPOOL_ +#define _RE2C_IR_DFA_TAGPOOL_ + +#include "src/ir/tag.h" +#include "src/util/lookup.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +static const size_t ZERO_TAGS = 0; + +struct Tagpool +{ +private: + typedef lookup_t taglookup_t; + taglookup_t lookup; + tagver_t *buffer; + +public: + const size_t ntags; + tagver_t *buffer1; + tagver_t *buffer2; + + explicit Tagpool(size_t n); + ~Tagpool(); + size_t insert(const tagver_t *tags); + const tagver_t *operator[](size_t idx) const; + FORBID_COPY(Tagpool); +}; + +} // namespace re2c + +#endif // _RE2C_IR_DFA_TAGPOOL_ diff --git a/re2c/src/ir/nfa/init_rules.cc b/re2c/src/ir/nfa/init_rules.cc index d884cb17..b48022e2 100644 --- a/re2c/src/ir/nfa/init_rules.cc +++ b/re2c/src/ir/nfa/init_rules.cc @@ -35,8 +35,7 @@ static void assert_tags_used_once(const Rule &rule, void init_rules(const std::vector ®exps, std::valarray &rules, - const std::valarray &tags, - Tagpool &tagpool) + const std::valarray &tags) { const size_t nr = rules.size(); const size_t nt = tags.size(); @@ -50,14 +49,14 @@ void init_rules(const std::vector ®exps, rule.htag = t; // mark *all* variable tags, including trailing context - tagver_t *vers = tagpool.buffer1; + tagver_t *vers = new tagver_t[nt]; std::fill(vers, vers + nt, TAGVER_ZERO); for (size_t i = rule.ltag; i < rule.htag; ++i) { if (tags[i].type == Tag::VAR) { vers[i] = static_cast(i + 1); } } - rule.tags = tagpool.insert(vers); + rule.tags = vers; // tags in trailing context are forbidden (they make no sense), // and since tags are constructed in reversed order, this implies diff --git a/re2c/src/ir/nfa/nfa.cc b/re2c/src/ir/nfa/nfa.cc index dc922458..b2dc638a 100644 --- a/re2c/src/ir/nfa/nfa.cc +++ b/re2c/src/ir/nfa/nfa.cc @@ -8,20 +8,18 @@ nfa_t::nfa_t(const std::vector ®exps) , states(NULL) , rules(*new std::valarray(regexps.size())) , tags(NULL) - , tagpool(NULL) , root(NULL) { size_t ntags = 0; max_size = counters(regexps, ntags); - tagpool = new Tagpool(ntags); tags = new std::valarray(ntags); make_tags(regexps, *tags); states = new nfa_state_t[max_size]; regexps2nfa(regexps, *this); - init_rules(regexps, rules, *tags, *tagpool); + init_rules(regexps, rules, *tags); } nfa_t::~nfa_t() diff --git a/re2c/src/ir/nfa/nfa.h b/re2c/src/ir/nfa/nfa.h index be376179..11fdc89d 100644 --- a/re2c/src/ir/nfa/nfa.h +++ b/re2c/src/ir/nfa/nfa.h @@ -77,7 +77,6 @@ struct nfa_t nfa_state_t *states; std::valarray &rules; std::valarray *tags; - Tagpool *tagpool; nfa_state_t *root; nfa_t(const std::vector &rs); @@ -90,10 +89,7 @@ size_t counters(const std::vector ®exps, size_t &ntags); void make_tags(const std::vector ®exps, std::valarray &tags); void regexps2nfa(const std::vector ®exps, nfa_t &nfa); bool nullable_rule(const RegExpRule *rule); -void init_rules(const std::vector ®exps, - std::valarray &rules, - const std::valarray &tags, - Tagpool &tagpool); +void init_rules(const std::vector ®exps, std::valarray &rules, const std::valarray &tags); } // namespace re2c diff --git a/re2c/src/ir/rule.h b/re2c/src/ir/rule.h index b1dd91d6..9d7437fa 100644 --- a/re2c/src/ir/rule.h +++ b/re2c/src/ir/rule.h @@ -38,7 +38,7 @@ struct Rule size_t ltag; size_t htag; size_t trail; - size_t tags; + tagver_t *tags; std::set shadow; Rule() @@ -46,9 +46,13 @@ struct Rule , ltag(0) , htag(0) , trail(Tag::NONE) - , tags(ZERO_TAGS) + , tags(NULL) , shadow() {} + ~Rule() + { + delete[] tags; + } FORBID_COPY(Rule); }; diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index 562fa49d..2cf84078 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -112,7 +112,7 @@ Skeleton::Skeleton( if (s->rule != Rule::NONE) { trail = dfa.rules[s->rule].trail; if (trail != Tag::NONE) { - trver = dfa.tagpool[dfa.rules[s->rule].tags][trail]; + trver = dfa.rules[s->rule].tags[trail]; } } diff --git a/re2c/src/ir/tag.cc b/re2c/src/ir/tag.cc index 391761ed..44b0eb3c 100644 --- a/re2c/src/ir/tag.cc +++ b/re2c/src/ir/tag.cc @@ -1,26 +1,11 @@ -#include #include -#include // malloc -#include // memcpy, memcmp #include "src/ir/rule.h" #include "src/ir/tag.h" -#include "src/util/hash32.h" namespace re2c { -struct eqtag_t -{ - size_t ntags; - - explicit eqtag_t(size_t n): ntags(n) {} - inline tagver_t operator()(const tagver_t *x, const tagver_t *y) const - { - return memcmp(x, y, ntags * sizeof(tagver_t)) == 0; - } -}; - const size_t Tag::NONE = std::numeric_limits::max(); Tag::Tag() @@ -46,47 +31,6 @@ void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d) tag.fix.dist = d; } -Tagpool::Tagpool(size_t n) - : lookup() - , buffer(new tagver_t[n * 3]) - , ntags(n) - , buffer1(&buffer[n * 1]) - , buffer2(&buffer[n * 2]) -{ - // all-zero tag configuration must have static number zero - std::fill(buffer, buffer + ntags, TAGVER_ZERO); - assert(ZERO_TAGS == insert(buffer)); -} - -Tagpool::~Tagpool() -{ - delete[] buffer; - const size_t n = lookup.size(); - for (size_t i = 0; i < n; ++i) { - free(const_cast(lookup[i])); - } -} - -size_t Tagpool::insert(const tagver_t *tags) -{ - const size_t size = ntags * sizeof(tagver_t); - const uint32_t hash = hash32(0, tags, size); - - const size_t idx = lookup.find_with(hash, tags, eqtag_t(ntags)); - if (idx != taglookup_t::NIL) { - return idx; - } - - tagver_t *copy = static_cast(malloc(size)); - memcpy(copy, tags, size); - return lookup.push(hash, copy); -} - -const tagver_t *Tagpool::operator[](size_t idx) const -{ - return lookup[idx]; -} - free_list tagsave_t::freelist; tagsave_t::tagsave_t(tagsave_t *n, tagver_t v) diff --git a/re2c/src/ir/tag.h b/re2c/src/ir/tag.h index 541ea228..91557baa 100644 --- a/re2c/src/ir/tag.h +++ b/re2c/src/ir/tag.h @@ -3,7 +3,7 @@ #include -#include "src/util/lookup.h" +#include "src/util/c99_stdint.h" #include "src/util/forbid_copy.h" #include "src/util/free_list.h" @@ -34,27 +34,6 @@ struct Tag void init_var_tag(Tag &tag, size_t r, const std::string *n); void init_fix_tag(Tag &tag, size_t r, const std::string *n, size_t b, size_t d); -static const size_t ZERO_TAGS = 0; - -struct Tagpool -{ -private: - typedef lookup_t taglookup_t; - taglookup_t lookup; - tagver_t *buffer; - -public: - const size_t ntags; - tagver_t *buffer1; - tagver_t *buffer2; - - explicit Tagpool(size_t n); - ~Tagpool(); - size_t insert(const tagver_t *tags); - const tagver_t *operator[](size_t idx) const; - FORBID_COPY(Tagpool); -}; - struct tagsave_t { static free_list freelist; -- 2.50.0