From: Ulya Trofimovich Date: Sat, 19 Nov 2016 23:01:44 +0000 (+0000) Subject: Use different datatypes for closures and kernels. X-Git-Tag: 1.0~39^2~222 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d165004d92cdc2e5e93c5eb5e9e511fc47f269b9;p=re2c Use different datatypes for closures and kernels. This is a preliminary step for tracking tag versions in closures / kernels. For now closures and kernels store the same information, but it will diverge when we start tracking tag versions: closure items will need some extra-data that is needed during closure construction, but shouldn't be in kernel. Kernel representation should also allow efficient comparison for identitiy or compatibility (for mapping). --- diff --git a/re2c/src/ir/dfa/closure.cc b/re2c/src/ir/dfa/closure.cc index 63b2a36c..2ef786ae 100644 --- a/re2c/src/ir/dfa/closure.cc +++ b/re2c/src/ir/dfa/closure.cc @@ -10,6 +10,7 @@ static void closure_one(closure_t &clos, Tagpool &tagpool, nfa_state_t *n, tagve static void check_tags(const Tagpool &tagpool, size_t oldidx, size_t newidx, bool *badtags); static bool compare_by_rule(const clos_t &c1, const clos_t &c2); static void prune_final_items(closure_t &clos, std::valarray &rules); +static bool not_fin(const clos_t &c); static tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool, tcpool_t &tcpool, const std::valarray &rules, bool *badtags); tagsave_t *closure(const closure_t &clos1, closure_t &clos2, @@ -156,7 +157,7 @@ void prune_final_items(closure_t &clos, std::valarray &rules) clositer_t b = clos.begin(), e = clos.end(), - f = std::partition(b, e, clos_t::not_final); + f = std::partition(b, e, not_fin); if (f != e) { std::partial_sort(f, f, e, compare_by_rule); // mark all rules except the first one as shadowed @@ -169,6 +170,11 @@ void prune_final_items(closure_t &clos, std::valarray &rules) } } +bool not_fin(const clos_t &c) +{ + return c.state->type != nfa_state_t::FIN; +} + // WARNING: this function assumes that closure items are grouped bu rule tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool, tcpool_t &tcpool, const std::valarray &rules, bool *badtags) diff --git a/re2c/src/ir/dfa/closure.h b/re2c/src/ir/dfa/closure.h index 423613da..d954bb45 100644 --- a/re2c/src/ir/dfa/closure.h +++ b/re2c/src/ir/dfa/closure.h @@ -16,8 +16,6 @@ struct clos_t inline clos_t(); inline clos_t(nfa_state_t *s, size_t i); - static inline bool final(const clos_t &c); - static inline bool not_final(const clos_t &c); }; typedef std::vector closure_t; @@ -38,16 +36,6 @@ clos_t::clos_t(nfa_state_t *s, size_t i) , tagidx(i) {} -bool clos_t::final(const clos_t &c) -{ - return c.state->type == nfa_state_t::FIN; -} - -bool clos_t::not_final(const clos_t &c) -{ - return !clos_t::final(c); -} - } // namespace re2c #endif // _RE2C_IR_DFA_CLOSURE_ diff --git a/re2c/src/ir/dfa/determinization.cc b/re2c/src/ir/dfa/determinization.cc index a145ca15..6f81c4e1 100644 --- a/re2c/src/ir/dfa/determinization.cc +++ b/re2c/src/ir/dfa/determinization.cc @@ -1,4 +1,3 @@ -#include #include #include @@ -16,7 +15,7 @@ namespace re2c static tagver_t vartag_maxver(const std::valarray &tags); static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol); -static void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol); +static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol); static void warn_bad_tags(const bool *badtags, const std::valarray &tags, const std::valarray &rules, const std::string &cond); @@ -35,15 +34,13 @@ nfa_state_t *transition(nfa_state_t *state, uint32_t symbol) return NULL; } -void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol) +void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol) { - clos2.clear(); - for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) { - nfa_state_t - *s1 = c->state, - *s2 = transition(s1, symbol); - if (s2) { - clos2.push_back(clos_t(s2, c->tagidx)); + clos.clear(); + for (size_t i = 0; i < kernel->size; ++i) { + nfa_state_t *s = transition(kernel->state[i], symbol); + if (s) { + clos.push_back(clos_t(s, kernel->tlook[i])); } } } @@ -60,18 +57,18 @@ dfa_t::dfa_t(const nfa_t &nfa, { const size_t ntag = tags.size(); Tagpool tagpool(ntag); - clospool_t clospool; + kernels_t kernels; closure_t clos1, clos2; bool *badtags = new bool[ntag](); maxtagver = vartag_maxver(tags); clos1.push_back(clos_t(nfa.root, ZERO_TAGS)); closure(clos1, clos2, tagpool, tcpool, rules, badtags); - clospool.insert(clos2); + kernels.insert(clos2); - // closures are in sync with DFA states - for (size_t i = 0; i < clospool.size(); ++i) { - const closure_t &clos0 = clospool[i]; + // closure kernels are in sync with DFA states + for (size_t i = 0; i < kernels.size(); ++i) { + const kernel_t *kernel = kernels[i]; // create new DFA state dfa_state_t *s = new dfa_state_t(nchars); @@ -79,20 +76,22 @@ dfa_t::dfa_t(const nfa_t &nfa, // check if the new state is final // see note [at most one final item per closure] - cclositer_t e = clos0.end(), - f = std::find_if(clos0.begin(), e, clos_t::final); - if (f != e) { - s->rule = f->state->rule; - s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[f->tagidx], rules[s->rule].tags, ntag); + for (size_t i = 0; i < kernel->size; ++i) { + const nfa_state_t *f = kernel->state[i]; + if (f->type == nfa_state_t::FIN) { + s->rule = f->rule; + s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tlook[i]], rules[s->rule].tags, ntag); + break; + } } // for each alphabet symbol, build tagged epsilon-closure // of all NFA states reachable on that symbol, then try to // find identical closure or add the new one for (size_t c = 0; c < nchars; ++c) { - reach(clos0, clos1, charset[c]); + reach(kernel, clos1, charset[c]); s->tcmd[c].save = closure(clos1, clos2, tagpool, tcpool, rules, badtags); - s->arcs[c] = clospool.insert(clos2); + s->arcs[c] = kernels.insert(clos2); } } diff --git a/re2c/src/ir/dfa/find_state.cc b/re2c/src/ir/dfa/find_state.cc index fe762156..3bfd878f 100644 --- a/re2c/src/ir/dfa/find_state.cc +++ b/re2c/src/ir/dfa/find_state.cc @@ -6,71 +6,96 @@ namespace re2c { -static uint32_t hashclos(const closure_t &clos); -static bool eqclos(const closure_t *clos1, const closure_t *clos2); +kernel_t::kernel_t(size_t n) + : size(n) + , state(new nfa_state_t*[size]) + , tlook(new size_t[size]) +{} -uint32_t hashclos(const closure_t &clos) +kernel_t *kernel_t::copy(const kernel_t &k) { - uint32_t h = static_cast(clos.size()); // seed - for (cclositer_t c = clos.begin(); c != clos.end(); ++c) { - h = hash32(h, &c->state, sizeof(c->state)); - h = hash32(h, &c->tagidx, sizeof(c->tagidx)); - } - return h; + const size_t n = k.size; + kernel_t *kcopy = new kernel_t(n); + memcpy(kcopy->state, k.state, n * sizeof(void*)); + memcpy(kcopy->tlook, k.tlook, n * sizeof(size_t)); + return kcopy; } -bool eqclos(const closure_t *clos1, const closure_t *clos2) +kernel_t::~kernel_t() { - if (clos1->size() != clos2->size()) { - return false; - } - for (cclositer_t c1 = clos1->begin(), c2 = clos2->begin(); - c1 != clos1->end(); ++c1, ++c2) { - if (c1->state != c2->state - || c1->tagidx != c2->tagidx) { - return false; - } - } - return true; + delete[] state; + delete[] tlook; } -clospool_t::clospool_t(): lookup() {} +struct kernel_eq_t +{ + bool operator()(const kernel_t *x, const kernel_t *y) const + { + return x->size == y->size + && memcmp(x->state, y->state, x->size * sizeof(void*)) == 0 + && memcmp(x->tlook, y->tlook, x->size * sizeof(size_t)) == 0; + } +}; + +kernels_t::kernels_t() + : lookup() + , maxsize(256) // usually ranges from one to some twenty + , buffer(new kernel_t(maxsize)) +{} -clospool_t::~clospool_t() +kernels_t::~kernels_t() { + delete buffer; + const size_t n = lookup.size(); for (size_t i = 0; i < n; ++i) { delete lookup[i]; } } -size_t clospool_t::size() const +size_t kernels_t::size() const { return lookup.size(); } -const closure_t& clospool_t::operator[](size_t idx) const +const kernel_t *kernels_t::operator[](size_t idx) const { - return *lookup[idx]; + return lookup[idx]; } -size_t clospool_t::insert(const closure_t &clos) +size_t kernels_t::insert(const closure_t &clos) { + const size_t nkern = clos.size(); + // empty closure corresponds to default state - if (clos.empty()) { - return dfa_t::NIL; - } + if (nkern == 0) return dfa_t::NIL; - const uint32_t hash = hashclos(clos); + // resize buffer if closure is too large + if (maxsize < nkern) { + maxsize = nkern * 2; // in advance + delete buffer; + buffer = new kernel_t(maxsize); + } - // try to find an identical DFA state - size_t idx = lookup.find_with(hash, &clos, eqclos); - if (idx != closlookup_t::NIL) { - return idx; + // copy closure to buffer kernel + buffer->size = nkern; + for (size_t i = 0; i < nkern; ++i) { + const clos_t &c = clos[i]; + buffer->state[i] = c.state; + buffer->tlook[i] = c.tagidx; } - // otherwise add a new state - return lookup.push(hash, new closure_t(clos)); + // get kernel hash + uint32_t hash = static_cast(nkern); // seed + hash = hash32(hash, buffer->state, nkern * sizeof(void*)); + hash = hash32(hash, buffer->tlook, nkern * sizeof(size_t)); + + // try to find identical kernel + size_t idx = lookup.find_with(hash, buffer, kernel_eq_t()); + if (idx != index_t::NIL) return idx; + + // otherwise add new kernel + return lookup.push(hash, kernel_t::copy(*buffer)); } } // namespace re2c diff --git a/re2c/src/ir/dfa/find_state.h b/re2c/src/ir/dfa/find_state.h index a68061ee..931f9e98 100644 --- a/re2c/src/ir/dfa/find_state.h +++ b/re2c/src/ir/dfa/find_state.h @@ -2,25 +2,40 @@ #define _RE2C_IR_DFA_FIND_STATE_ #include "src/ir/dfa/closure.h" +#include "src/util/forbid_copy.h" #include "src/util/lookup.h" namespace re2c { -struct Tagpool; +struct kernel_t +{ + size_t size; + nfa_state_t **state; + size_t *tlook; + + explicit kernel_t(size_t n); + ~kernel_t(); + static kernel_t *copy(const kernel_t &k); + FORBID_COPY(kernel_t); +}; -struct clospool_t +struct kernels_t { private: - typedef lookup_t closlookup_t; - closlookup_t lookup; + typedef lookup_t index_t; + + index_t lookup; + size_t maxsize; + kernel_t *buffer; public: - clospool_t(); - ~clospool_t(); + kernels_t(); + ~kernels_t(); size_t size() const; - const closure_t& operator[](size_t idx) const; + const kernel_t* operator[](size_t idx) const; size_t insert(const closure_t &clos); + FORBID_COPY(kernels_t); }; } // namespace re2c