From a61dee4c666be842ac49a07f6adc718159e40a32 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 17 May 2016 17:46:05 +0100 Subject: [PATCH] Use tagpool to store and manipulate intermediate tag sets. Tagpool now has a special buffer to accommodate the needs of intermediate calculations involving tag sets. Tagpool is in charge of these calculations now; it has various functions to merge, mask and whatever one has to do with tag sets. --- re2c/Makefile.am | 1 + re2c/src/ir/dfa/determinization.cc | 6 +- re2c/src/ir/dfa/tag_deduplication.cc | 114 +++++++++------------------ re2c/src/ir/nfa/init_rules.cc | 9 ++- re2c/src/ir/nfa/nfa.cc | 4 +- re2c/src/ir/nfa/nfa.h | 5 +- re2c/src/ir/rule.h | 8 +- re2c/src/ir/skeleton/skeleton.cc | 13 +-- re2c/src/ir/skeleton/skeleton.h | 1 - re2c/src/ir/tagpool.cc | 97 +++++++++++++++++++++++ re2c/src/ir/tagpool.h | 62 +++------------ 11 files changed, 167 insertions(+), 153 deletions(-) create mode 100644 re2c/src/ir/tagpool.cc diff --git a/re2c/Makefile.am b/re2c/Makefile.am index f4deee0e..3c1597a0 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -114,6 +114,7 @@ SRC = \ src/ir/skeleton/skeleton.cc \ src/ir/skeleton/unreachable_nullable.cc \ src/ir/tag.cc \ + src/ir/tagpool.cc \ src/main.cc \ src/parse/code.cc \ src/parse/input.cc \ diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index ba9c85d4..7b748166 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -137,7 +137,7 @@ dfa_t::dfa_t(const nfa_t &nfa, , nchars(charset.size() - 1) // (n + 1) bounds for n ranges , rules(nfa.rules) , tags(*nfa.tags) - , tagpool(*new Tagpool(tags.size())) + , tagpool(*nfa.tagpool) { const size_t ntags = tags.size(); const size_t nrules = rules.size(); @@ -178,7 +178,7 @@ dfa_t::dfa_t(const nfa_t &nfa, for (; charset[c] != r->lower(); ++c); for (; charset[c] != r->upper(); ++c) { merge_tags_with_mask(&arctags[c * ntags], newtags, - &mask[c * ntags], rules[m->rule].tags, + &mask[c * ntags], tagpool[rules[m->rule].tags], badtags, ntags); arcs[c].push_back(m); } @@ -187,7 +187,7 @@ dfa_t::dfa_t(const nfa_t &nfa, } case nfa_state_t::FIN: merge_tags_with_mask(&arctags[nchars * ntags], newtags, - &mask[nchars * ntags], rules[n->rule].tags, + &mask[nchars * ntags], tagpool[rules[n->rule].tags], badtags, ntags); fin[n->rule] = true; break; diff --git a/re2c/src/ir/dfa/tag_deduplication.cc b/re2c/src/ir/dfa/tag_deduplication.cc index 3ef91e06..bca0d22e 100644 --- a/re2c/src/ir/dfa/tag_deduplication.cc +++ b/re2c/src/ir/dfa/tag_deduplication.cc @@ -5,19 +5,6 @@ namespace re2c { -static size_t mask_dead_tags(Tagpool &tagpool, - size_t oldidx, const bool *live) -{ - const bool *oldtags = tagpool[oldidx]; - bool *newtags = new bool[tagpool.ntags]; - for (size_t i = 0; i < tagpool.ntags; ++i) { - newtags[i] = oldtags[i] & live[i]; - } - const size_t newidx = tagpool.insert(newtags); - delete[] newtags; - return newidx; -} - static bool dangling_arcs(const size_t *arcs, size_t narcs) { for (size_t i = 0; i < narcs; ++i) { @@ -30,37 +17,38 @@ static bool dangling_arcs(const size_t *arcs, size_t narcs) /* note [liveness analyses on tags] * - * Tag T is alive in state S if either: + * Tag T is alive in state S if there is a transition + * from S to some state S' that does not update T + * and either: * - * - There is a transition from S to default state that does not set T, - * S is final and T belongs to tag set associated with rule in S. + * - S' is the default state + * and S is final + * and T belongs to S's final tag set * - * - There is a transition from S to default state that does not set T - * and T belongs to any tag set associated with fallback rules. + * - S' is the default state + * and S is not final + * and T belongs to fallback tag set * - * - There is a transition from S to some state S' (maybe equal to S) - * that does not set T and T is alive in S'. + * - S' is not the default state + * and T is alive in S' */ -static void calc_live(const dfa_t &dfa, const bool *fallback, bool *live) +static void calc_live(const dfa_t &dfa, size_t fallback, size_t *live) { const size_t nstates = dfa.states.size(); - const size_t ntags = dfa.tags.size(); for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; if (dangling_arcs(s->arcs, dfa.nchars)) { if (s->rule != Rule::NONE) { // final state, only rule tags are alive - add_tags_with_mask(&live[i * ntags], - dfa.rules[s->rule].tags, - dfa.tagpool[s->rule_tags], - ntags); + dfa.tagpool.orl_with_mask(&live[i], + dfa.rules[s->rule].tags, s->rule_tags); } else { // transition to default state and dispatch on // 'yyaccept': all fallback rules are potentially // reachable, their tags are alive // no mask: no rule implies no rule tags - add_tags(&live[i * ntags], fallback, ntags); + dfa.tagpool.orl(&live[i], fallback); } } } @@ -72,33 +60,28 @@ static void calc_live(const dfa_t &dfa, const bool *fallback, bool *live) for (size_t c = 0; c < dfa.nchars; ++c) { const size_t j = s->arcs[c]; if (j != dfa_t::NIL) { - loop |= addcmp_tags_with_mask(&live[i * ntags], - &live[j * ntags], - dfa.tagpool[s->tags[c]], - ntags); + const size_t old = live[i]; + dfa.tagpool.orl_with_mask(&live[i], live[j], s->tags[c]); + loop |= old != live[i]; } } } } } -static void mask_dead(dfa_t &dfa, - const bool *livetags) +static void mask_dead(dfa_t &dfa, const size_t *live) { const size_t nstates = dfa.states.size(); - const size_t ntags = dfa.tags.size(); for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; for (size_t c = 0; c < dfa.nchars; ++c) { const size_t j = s->arcs[c]; if (j != dfa_t::NIL) { - s->tags[c] = mask_dead_tags(dfa.tagpool, - s->tags[c], &livetags[j * ntags]); + dfa.tagpool.andl(&s->tags[c], live[j]); } } if (s->rule != Rule::NONE) { - s->rule_tags = mask_dead_tags(dfa.tagpool, - s->rule_tags, dfa.rules[s->rule].tags); + dfa.tagpool.andl(&s->rule_tags, dfa.rules[s->rule].tags); } } } @@ -106,10 +89,11 @@ static void mask_dead(dfa_t &dfa, // tags that are updated here are pairwise incompatible // with all tags that are alive, but not updated here static void incompatible(bool *tbl, - const bool *live, - const bool *tags, - size_t ntags) + Tagpool &tagpool, size_t l, size_t t) { + const size_t ntags = tagpool.ntags; + const bool *live = tagpool[l]; + const bool *tags = tagpool[t]; for (size_t i = 0; i < ntags; ++i) { if (live[i] && !tags[i]) { for (size_t j = 0; j < ntags; ++j) { @@ -122,8 +106,7 @@ static void incompatible(bool *tbl, } static void incompatibility_table(const dfa_t &dfa, - const bool *livetags, - const bool *deftags, + const size_t *livetags, size_t deftags, bool *incompattbl) { const size_t nstates = dfa.states.size(); @@ -133,23 +116,17 @@ static void incompatibility_table(const dfa_t &dfa, for (size_t c = 0; c < dfa.nchars; ++c) { const size_t j = s->arcs[c]; if (j != dfa_t::NIL) { - incompatible(incompattbl, - &livetags[j * ntags], - dfa.tagpool[s->tags[c]], - ntags); + incompatible(incompattbl, dfa.tagpool, + livetags[j], s->tags[c]); } } if (dangling_arcs(s->arcs, dfa.nchars)) { if (s->rule != Rule::NONE) { - incompatible(incompattbl, - dfa.rules[s->rule].tags, - dfa.tagpool[s->rule_tags], - ntags); + incompatible(incompattbl, dfa.tagpool, + dfa.rules[s->rule].tags, s->rule_tags); } else { - incompatible(incompattbl, - deftags, - dfa.tagpool[s->rule_tags], - ntags); + incompatible(incompattbl, dfa.tagpool, + deftags, s->rule_tags); } } } @@ -177,7 +154,6 @@ static void incompatibility_table(const dfa_t &dfa, * Finding minimal clique cover in arbitrary graph is NP-complete. * We build just some cover (not necessarily minimal). * The algorithm takes quadratic (in the number of tags) time. - * static void equivalence_classes(const std::vector &incompattbl, */ static void equivalence_classes(const bool *incompattbl, size_t ntags, std::vector &represent) @@ -212,30 +188,15 @@ static void equivalence_classes(const bool *incompattbl, } } -static size_t patch_tagset(Tagpool &tagpool, size_t oldidx, - const std::vector &represent) -{ - const bool *oldtags = tagpool[oldidx]; - bool *newtags = new bool[tagpool.ntags](); - for (size_t i = 0; i < tagpool.ntags; ++i) { - if (oldtags[i]) { - newtags[represent[i]] = true; - } - } - const size_t newidx = tagpool.insert(newtags); - delete[] newtags; - return newidx; -} - static void patch_tags(dfa_t &dfa, const std::vector &represent) { const size_t nstates = dfa.states.size(); for (size_t i = 0; i < nstates; ++i) { dfa_state_t *s = dfa.states[i]; for (size_t c = 0; c < dfa.nchars; ++c) { - s->tags[c] = patch_tagset(dfa.tagpool, s->tags[c], represent); + dfa.tagpool.subst(&s->tags[c], represent); } - s->rule_tags = patch_tagset(dfa.tagpool, s->rule_tags, represent); + dfa.tagpool.subst(&s->rule_tags, represent); } const size_t ntags = dfa.tags.size(); @@ -255,14 +216,14 @@ size_t deduplicate_tags(dfa_t &dfa, return 0; } - bool *fbtags = new bool[ntags](); + size_t fbtags = 0; for (size_t i = 0; i < fallback.size(); ++i) { const size_t r = dfa.states[fallback[i]]->rule; - add_tags(fbtags, dfa.rules[r].tags, ntags); + dfa.tagpool.orl(&fbtags, dfa.rules[r].tags); } const size_t nstates = dfa.states.size(); - bool *live = new bool[nstates * ntags](); + size_t *live = new size_t[nstates](); calc_live(dfa, fbtags, live); mask_dead(dfa, live); @@ -284,7 +245,6 @@ size_t deduplicate_tags(dfa_t &dfa, patch_tags(dfa, represent); } - delete[] fbtags; delete[] live; delete[] incompattbl; diff --git a/re2c/src/ir/nfa/init_rules.cc b/re2c/src/ir/nfa/init_rules.cc index 921714c7..dc3dc512 100644 --- a/re2c/src/ir/nfa/init_rules.cc +++ b/re2c/src/ir/nfa/init_rules.cc @@ -35,7 +35,8 @@ static void assert_tags_used_once(const Rule &rule, void init_rules(const std::vector ®exps, std::valarray &rules, - const std::valarray &tags) + const std::valarray &tags, + Tagpool &tagpool) { const size_t nr = rules.size(); const size_t nt = tags.size(); @@ -50,10 +51,12 @@ void init_rules(const std::vector ®exps, rule.htag = t; // mark *all* variable tags, including trailing context - rule.tags = new bool[nt](); + bool *mask = new bool[nt](); for (size_t i = rule.ltag; i < rule.htag; ++i) { - rule.tags[i] = tags[i].type == Tag::VAR; + mask[i] = tags[i].type == Tag::VAR; } + rule.tags = tagpool.insert(mask); + delete[] mask; // tags in trailing context are forbidden (they make no sense), // and since tags are constructed in reversed order, this implies diff --git a/re2c/src/ir/nfa/nfa.cc b/re2c/src/ir/nfa/nfa.cc index b2dc638a..dc922458 100644 --- a/re2c/src/ir/nfa/nfa.cc +++ b/re2c/src/ir/nfa/nfa.cc @@ -8,18 +8,20 @@ nfa_t::nfa_t(const std::vector ®exps) , states(NULL) , rules(*new std::valarray(regexps.size())) , tags(NULL) + , tagpool(NULL) , root(NULL) { size_t ntags = 0; max_size = counters(regexps, ntags); + tagpool = new Tagpool(ntags); tags = new std::valarray(ntags); make_tags(regexps, *tags); states = new nfa_state_t[max_size]; regexps2nfa(regexps, *this); - init_rules(regexps, rules, *tags); + init_rules(regexps, rules, *tags, *tagpool); } nfa_t::~nfa_t() diff --git a/re2c/src/ir/nfa/nfa.h b/re2c/src/ir/nfa/nfa.h index b3e60187..5d4f24e4 100644 --- a/re2c/src/ir/nfa/nfa.h +++ b/re2c/src/ir/nfa/nfa.h @@ -8,6 +8,7 @@ #include "src/ir/regexp/regexp.h" #include "src/ir/rule.h" +#include "src/ir/tagpool.h" #include "src/util/forbid_copy.h" namespace re2c @@ -76,6 +77,7 @@ struct nfa_t nfa_state_t *states; std::valarray &rules; std::valarray *tags; + Tagpool *tagpool; nfa_state_t *root; nfa_t(const std::vector &rs); @@ -90,7 +92,8 @@ void regexps2nfa(const std::vector ®exps, nfa_t &nfa); bool nullable_rule(const RegExpRule *rule); void init_rules(const std::vector ®exps, std::valarray &rules, - const std::valarray &tags); + const std::valarray &tags, + Tagpool &tagpool); } // namespace re2c diff --git a/re2c/src/ir/rule.h b/re2c/src/ir/rule.h index 0afadec8..0cfc6bdb 100644 --- a/re2c/src/ir/rule.h +++ b/re2c/src/ir/rule.h @@ -38,8 +38,8 @@ struct Rule size_t ltag; size_t htag; size_t trail; + size_t tags; bool nullable; - bool *tags; std::set shadow; bool reachable; @@ -48,15 +48,11 @@ struct Rule , ltag(0) , htag(0) , trail(Tag::NONE) + , tags(0) , nullable(false) - , tags(NULL) , shadow() , reachable(false) {} - ~Rule() - { - delete[] tags; - } FORBID_COPY(Rule); }; diff --git a/re2c/src/ir/skeleton/skeleton.cc b/re2c/src/ir/skeleton/skeleton.cc index b83b07b9..00140e21 100644 --- a/re2c/src/ir/skeleton/skeleton.cc +++ b/re2c/src/ir/skeleton/skeleton.cc @@ -44,11 +44,6 @@ void Node::init(const bool *ts, size_t r, } } -Node::~Node() -{ - delete[] tags; -} - bool Node::end() const { return arcs.size() == 0; @@ -72,7 +67,6 @@ Skeleton::Skeleton( , tags(dfa.tags) { const size_t nc = cs.size() - 1; - const size_t ntags = tags.size(); // initialize skeleton nodes for (size_t i = 0; i < nodes_count - 1; ++i) { @@ -93,13 +87,12 @@ Skeleton::Skeleton( // in skeleton we are only interested in trailing contexts // which may be attributed to states rather than transitions - bool *tags = new bool[ntags](); - add_tags(tags, dfa.tagpool[s->rule_tags], ntags); + size_t tags = s->rule_tags; for (size_t c = 0; c < nc; ++c) { - add_tags(tags, dfa.tagpool[s->tags[c]], ntags); + dfa.tagpool.orl(&tags, s->tags[c]); } - nodes[i].init(tags, s->rule, arcs); + nodes[i].init(dfa.tagpool[tags], s->rule, arcs); } // initialize size of key diff --git a/re2c/src/ir/skeleton/skeleton.h b/re2c/src/ir/skeleton/skeleton.h index 1161011f..7e54d372 100644 --- a/re2c/src/ir/skeleton/skeleton.h +++ b/re2c/src/ir/skeleton/skeleton.h @@ -39,7 +39,6 @@ struct Node const bool *tags; Node(); - ~Node(); void init(const bool *ts, size_t r, const std::vector > &arcs); bool end() const; diff --git a/re2c/src/ir/tagpool.cc b/re2c/src/ir/tagpool.cc new file mode 100644 index 00000000..3253ea29 --- /dev/null +++ b/re2c/src/ir/tagpool.cc @@ -0,0 +1,97 @@ +#include "src/ir/tagpool.h" +#include "src/util/forbid_copy.h" + +namespace re2c +{ + +Tagpool::Tagpool(size_t n) + : ntags(n) + , pool() + , buff(new bool[ntags]()) +{ + // all-no tag set must have number 0 + insert(buff); +} + +Tagpool::~Tagpool() +{ + delete[] buff; +} + +size_t Tagpool::insert(const bool *tags) +{ + return pool.insert(tags, ntags * sizeof(bool)); +} + +void Tagpool::orl(size_t *pt, size_t o) +{ + const size_t t = *pt; + if (t == o || o == 0) { + return; + } else if (t == 0) { + *pt = o; + return; + } + + const bool *tags = operator[](t); + const bool *ortags = operator[](o); + for (size_t i = 0; i < ntags; ++i) { + buff[i] = tags[i] | ortags[i]; + } + *pt = insert(buff); +} + +void Tagpool::orl_with_mask(size_t *pt, size_t o, size_t m) +{ + const size_t t = *pt; + if (t == o || o == 0) { + return; + } + + const bool *tags = operator[](t); + const bool *ortags = operator[](o); + const bool *masktags = operator[](m); + for (size_t i = 0; i < ntags; ++i) { + buff[i] = tags[i] | (ortags[i] & ~masktags[i]); + } + *pt = insert(buff); +} + +void Tagpool::andl(size_t *pt, size_t a) +{ + const size_t t = *pt; + if (t == a) { + return; + } else if (t == 0 || a == 0) { + *pt = 0; + return; + } + + const bool *tags = operator[](t); + const bool *andtags = operator[](a); + for (size_t i = 0; i < ntags; ++i) { + buff[i] = tags[i] & andtags[i]; + } + *pt = insert(buff); +} + +void Tagpool::subst(size_t *pt, const std::vector &represent) +{ + const bool *tags = operator[](*pt); + memset(buff, 0, ntags * sizeof(bool)); + for (size_t i = 0; i < ntags; ++i) { + if (tags[i]) { + buff[represent[i]] = true; + } + } + *pt = insert(buff); +} + +const bool *Tagpool::operator[](size_t idx) +{ + const bool *tags; + pool.deref(idx, tags); + return tags; +} + +} // namespace re2c diff --git a/re2c/src/ir/tagpool.h b/re2c/src/ir/tagpool.h index d5a8dca1..9b972965 100644 --- a/re2c/src/ir/tagpool.h +++ b/re2c/src/ir/tagpool.h @@ -2,6 +2,7 @@ #define _RE2C_IR_TAGPOOL_ #include "src/util/ord_hash_set.h" +#include "src/util/forbid_copy.h" namespace re2c { @@ -12,61 +13,20 @@ struct Tagpool private: ord_hash_set_t pool; + bool *buff; public: - explicit inline Tagpool(size_t n); - inline size_t insert(const bool *tags); - inline const bool *operator[](size_t idx); + explicit Tagpool(size_t n); + ~Tagpool(); + size_t insert(const bool *tags); + void orl(size_t *pt, size_t o); + void orl_with_mask(size_t *pt, size_t o, size_t m); + void andl(size_t *pt, size_t a); + void subst(size_t *pt, const std::vector &represent); + const bool *operator[](size_t idx); + FORBID_COPY(Tagpool); }; -Tagpool::Tagpool(size_t n) - : ntags(n) - , pool() -{ - const bool *zerotags = new bool[ntags](); - insert(zerotags); - delete[] zerotags; -} - -size_t Tagpool::insert(const bool *tags) -{ - return pool.insert(tags, ntags * sizeof(bool)); -} - -const bool *Tagpool::operator[](size_t idx) -{ - const bool *tags; - pool.deref(idx, tags); - return tags; -} - -inline void add_tags(bool *oldtags, const bool *newtags, size_t ntags) -{ - for (size_t i = 0; i < ntags; ++i) { - oldtags[i] |= newtags[i]; - } -} - -inline void add_tags_with_mask(bool *oldtags, const bool *newtags, - const bool *mask, size_t ntags) -{ - for (size_t i = 0; i < ntags; ++i) { - oldtags[i] |= newtags[i] & ~mask[i]; - } -} - -inline bool addcmp_tags_with_mask(bool *oldtags, const bool *newtags, - const bool *mask, size_t ntags) -{ - bool diff = false; - for (size_t i = 0; i < ntags; ++i) { - const bool old = oldtags[i]; - oldtags[i] |= newtags[i] & ~mask[i]; - diff |= old != oldtags[i]; - } - return diff; -} - } // namespace re2c #endif // _RE2C_IR_TAGPOOL_ -- 2.40.0