This is a preliminary step for tracking tag versions in closures / kernels.
For now closures and kernels store the same information, but it will
diverge when we start tracking tag versions: closure items will need
some extra-data that is needed during closure construction, but shouldn't
be in kernel.
Kernel representation should also allow efficient comparison for identitiy
or compatibility (for mapping).
static void check_tags(const Tagpool &tagpool, size_t oldidx, size_t newidx, bool *badtags);
static bool compare_by_rule(const clos_t &c1, const clos_t &c2);
static void prune_final_items(closure_t &clos, std::valarray<Rule> &rules);
+static bool not_fin(const clos_t &c);
static tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool, tcpool_t &tcpool, const std::valarray<Rule> &rules, bool *badtags);
tagsave_t *closure(const closure_t &clos1, closure_t &clos2,
clositer_t
b = clos.begin(),
e = clos.end(),
- f = std::partition(b, e, clos_t::not_final);
+ f = std::partition(b, e, not_fin);
if (f != e) {
std::partial_sort(f, f, e, compare_by_rule);
// mark all rules except the first one as shadowed
}
}
+bool not_fin(const clos_t &c)
+{
+ return c.state->type != nfa_state_t::FIN;
+}
+
// WARNING: this function assumes that closure items are grouped bu rule
tagsave_t *merge_and_check_tags(const closure_t &clos, Tagpool &tagpool,
tcpool_t &tcpool, const std::valarray<Rule> &rules, bool *badtags)
inline clos_t();
inline clos_t(nfa_state_t *s, size_t i);
- static inline bool final(const clos_t &c);
- static inline bool not_final(const clos_t &c);
};
typedef std::vector<clos_t> closure_t;
, tagidx(i)
{}
-bool clos_t::final(const clos_t &c)
-{
- return c.state->type == nfa_state_t::FIN;
-}
-
-bool clos_t::not_final(const clos_t &c)
-{
- return !clos_t::final(c);
-}
-
} // namespace re2c
#endif // _RE2C_IR_DFA_CLOSURE_
-#include <algorithm>
#include <limits>
#include <vector>
static tagver_t vartag_maxver(const std::valarray<Tag> &tags);
static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol);
-static void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol);
+static void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol);
static void warn_bad_tags(const bool *badtags, const std::valarray<Tag> &tags,
const std::valarray<Rule> &rules, const std::string &cond);
return NULL;
}
-void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol)
+void reach(const kernel_t *kernel, closure_t &clos, uint32_t symbol)
{
- clos2.clear();
- for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
- nfa_state_t
- *s1 = c->state,
- *s2 = transition(s1, symbol);
- if (s2) {
- clos2.push_back(clos_t(s2, c->tagidx));
+ clos.clear();
+ for (size_t i = 0; i < kernel->size; ++i) {
+ nfa_state_t *s = transition(kernel->state[i], symbol);
+ if (s) {
+ clos.push_back(clos_t(s, kernel->tlook[i]));
}
}
}
{
const size_t ntag = tags.size();
Tagpool tagpool(ntag);
- clospool_t clospool;
+ kernels_t kernels;
closure_t clos1, clos2;
bool *badtags = new bool[ntag]();
maxtagver = vartag_maxver(tags);
clos1.push_back(clos_t(nfa.root, ZERO_TAGS));
closure(clos1, clos2, tagpool, tcpool, rules, badtags);
- clospool.insert(clos2);
+ kernels.insert(clos2);
- // closures are in sync with DFA states
- for (size_t i = 0; i < clospool.size(); ++i) {
- const closure_t &clos0 = clospool[i];
+ // closure kernels are in sync with DFA states
+ for (size_t i = 0; i < kernels.size(); ++i) {
+ const kernel_t *kernel = kernels[i];
// create new DFA state
dfa_state_t *s = new dfa_state_t(nchars);
// check if the new state is final
// see note [at most one final item per closure]
- cclositer_t e = clos0.end(),
- f = std::find_if(clos0.begin(), e, clos_t::final);
- if (f != e) {
- s->rule = f->state->rule;
- s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[f->tagidx], rules[s->rule].tags, ntag);
+ for (size_t i = 0; i < kernel->size; ++i) {
+ const nfa_state_t *f = kernel->state[i];
+ if (f->type == nfa_state_t::FIN) {
+ s->rule = f->rule;
+ s->tcmd[nchars] = tcpool.conv_to_tcmd(tagpool[kernel->tlook[i]], rules[s->rule].tags, ntag);
+ break;
+ }
}
// for each alphabet symbol, build tagged epsilon-closure
// of all NFA states reachable on that symbol, then try to
// find identical closure or add the new one
for (size_t c = 0; c < nchars; ++c) {
- reach(clos0, clos1, charset[c]);
+ reach(kernel, clos1, charset[c]);
s->tcmd[c].save = closure(clos1, clos2, tagpool, tcpool, rules, badtags);
- s->arcs[c] = clospool.insert(clos2);
+ s->arcs[c] = kernels.insert(clos2);
}
}
namespace re2c
{
-static uint32_t hashclos(const closure_t &clos);
-static bool eqclos(const closure_t *clos1, const closure_t *clos2);
+kernel_t::kernel_t(size_t n)
+ : size(n)
+ , state(new nfa_state_t*[size])
+ , tlook(new size_t[size])
+{}
-uint32_t hashclos(const closure_t &clos)
+kernel_t *kernel_t::copy(const kernel_t &k)
{
- uint32_t h = static_cast<uint32_t>(clos.size()); // seed
- for (cclositer_t c = clos.begin(); c != clos.end(); ++c) {
- h = hash32(h, &c->state, sizeof(c->state));
- h = hash32(h, &c->tagidx, sizeof(c->tagidx));
- }
- return h;
+ const size_t n = k.size;
+ kernel_t *kcopy = new kernel_t(n);
+ memcpy(kcopy->state, k.state, n * sizeof(void*));
+ memcpy(kcopy->tlook, k.tlook, n * sizeof(size_t));
+ return kcopy;
}
-bool eqclos(const closure_t *clos1, const closure_t *clos2)
+kernel_t::~kernel_t()
{
- if (clos1->size() != clos2->size()) {
- return false;
- }
- for (cclositer_t c1 = clos1->begin(), c2 = clos2->begin();
- c1 != clos1->end(); ++c1, ++c2) {
- if (c1->state != c2->state
- || c1->tagidx != c2->tagidx) {
- return false;
- }
- }
- return true;
+ delete[] state;
+ delete[] tlook;
}
-clospool_t::clospool_t(): lookup() {}
+struct kernel_eq_t
+{
+ bool operator()(const kernel_t *x, const kernel_t *y) const
+ {
+ return x->size == y->size
+ && memcmp(x->state, y->state, x->size * sizeof(void*)) == 0
+ && memcmp(x->tlook, y->tlook, x->size * sizeof(size_t)) == 0;
+ }
+};
+
+kernels_t::kernels_t()
+ : lookup()
+ , maxsize(256) // usually ranges from one to some twenty
+ , buffer(new kernel_t(maxsize))
+{}
-clospool_t::~clospool_t()
+kernels_t::~kernels_t()
{
+ delete buffer;
+
const size_t n = lookup.size();
for (size_t i = 0; i < n; ++i) {
delete lookup[i];
}
}
-size_t clospool_t::size() const
+size_t kernels_t::size() const
{
return lookup.size();
}
-const closure_t& clospool_t::operator[](size_t idx) const
+const kernel_t *kernels_t::operator[](size_t idx) const
{
- return *lookup[idx];
+ return lookup[idx];
}
-size_t clospool_t::insert(const closure_t &clos)
+size_t kernels_t::insert(const closure_t &clos)
{
+ const size_t nkern = clos.size();
+
// empty closure corresponds to default state
- if (clos.empty()) {
- return dfa_t::NIL;
- }
+ if (nkern == 0) return dfa_t::NIL;
- const uint32_t hash = hashclos(clos);
+ // resize buffer if closure is too large
+ if (maxsize < nkern) {
+ maxsize = nkern * 2; // in advance
+ delete buffer;
+ buffer = new kernel_t(maxsize);
+ }
- // try to find an identical DFA state
- size_t idx = lookup.find_with(hash, &clos, eqclos);
- if (idx != closlookup_t::NIL) {
- return idx;
+ // copy closure to buffer kernel
+ buffer->size = nkern;
+ for (size_t i = 0; i < nkern; ++i) {
+ const clos_t &c = clos[i];
+ buffer->state[i] = c.state;
+ buffer->tlook[i] = c.tagidx;
}
- // otherwise add a new state
- return lookup.push(hash, new closure_t(clos));
+ // get kernel hash
+ uint32_t hash = static_cast<uint32_t>(nkern); // seed
+ hash = hash32(hash, buffer->state, nkern * sizeof(void*));
+ hash = hash32(hash, buffer->tlook, nkern * sizeof(size_t));
+
+ // try to find identical kernel
+ size_t idx = lookup.find_with(hash, buffer, kernel_eq_t());
+ if (idx != index_t::NIL) return idx;
+
+ // otherwise add new kernel
+ return lookup.push(hash, kernel_t::copy(*buffer));
}
} // namespace re2c
#define _RE2C_IR_DFA_FIND_STATE_
#include "src/ir/dfa/closure.h"
+#include "src/util/forbid_copy.h"
#include "src/util/lookup.h"
namespace re2c
{
-struct Tagpool;
+struct kernel_t
+{
+ size_t size;
+ nfa_state_t **state;
+ size_t *tlook;
+
+ explicit kernel_t(size_t n);
+ ~kernel_t();
+ static kernel_t *copy(const kernel_t &k);
+ FORBID_COPY(kernel_t);
+};
-struct clospool_t
+struct kernels_t
{
private:
- typedef lookup_t<const closure_t*> closlookup_t;
- closlookup_t lookup;
+ typedef lookup_t<const kernel_t*> index_t;
+
+ index_t lookup;
+ size_t maxsize;
+ kernel_t *buffer;
public:
- clospool_t();
- ~clospool_t();
+ kernels_t();
+ ~kernels_t();
size_t size() const;
- const closure_t& operator[](size_t idx) const;
+ const kernel_t* operator[](size_t idx) const;
size_t insert(const closure_t &clos);
+ FORBID_COPY(kernels_t);
};
} // namespace re2c