src/util/counter.h \
src/util/forbid_copy.h \
src/util/free_list.h \
+ src/util/hash32.h \
src/util/intersect_sorted.h \
src/util/local_increment.h \
+ src/util/lookup.h \
src/util/ord_hash_set.h \
src/util/range.h \
src/util/s_to_n32_unsafe.h \
namespace re2c
{
+static void closure_one(closure_t &clos, nfa_state_t *n, bool *tags, bool *badtags, size_t ntags);
static void merge_tags(bool *oldtags, const bool *newtags, bool *badtags, size_t ntags);
+void closure(const closure_t &clos1, closure_t &clos2, bool *tags, bool *badtags, size_t ntags)
+{
+ clos2.clear();
+ for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
+ closure_one(clos2, c->state, tags, badtags, ntags);
+ }
+}
+
void merge_tags(bool *oldtags, const bool *newtags,
bool *badtags, size_t ntags)
{
* resulting tag sets: if they differ, then NFA is tagwise
* ambiguous. All tags are merged together; ambiguity is reported.
*/
-void closure(kitem_t *const kernel, kitem_t *&kend,
- nfa_state_t *n, bool *tags, bool *badtags, size_t ntags)
+void closure_one(closure_t &clos, nfa_state_t *n, bool *tags, bool *badtags, size_t ntags)
{
// trace the first iteration of each loop:
// epsilon-loops may add ney tags and reveal conflicts
++n->loop;
switch (n->type) {
case nfa_state_t::ALT:
- closure(kernel, kend, n->alt.out1, tags, badtags, ntags);
- closure(kernel, kend, n->alt.out2, tags, badtags, ntags);
+ closure_one(clos, n->alt.out1, tags, badtags, ntags);
+ closure_one(clos, n->alt.out2, tags, badtags, ntags);
break;
case nfa_state_t::TAG: {
const size_t t = n->tag.info;
const bool old = tags[t];
tags[t] = true;
- closure(kernel, kend, n->tag.out, tags, badtags, ntags);
+ closure_one(clos, n->tag.out, tags, badtags, ntags);
tags[t] = old;
break;
}
case nfa_state_t::RAN:
case nfa_state_t::FIN: {
- kitem_t *k = kernel;
- while (k != kend && k->state != n) ++k;
- if (k == kend) {
- kend->state = n;
- kend->tagptr = new bool[ntags];
- memcpy(kend->tagptr, tags, ntags * sizeof(bool));
- ++kend;
+ clositer_t
+ c = clos.begin(),
+ e = clos.end();
+ for (; c != e && c->state != n; ++c);
+ if (c == e) {
+ bool *tagptr = new bool[ntags];
+ memcpy(tagptr, tags, ntags * sizeof(bool));
+ clos.push_back(clos_t(n, tagptr));
} else {
// it is impossible to reach the same NFA state from
// different rules, so no need to mess with masks here
- merge_tags(k->tagptr, tags, badtags, ntags);
+ merge_tags(c->tagptr, tags, badtags, ntags);
}
break;
}
namespace re2c
{
-struct kitem_t
+struct clos_t
{
nfa_state_t *state;
union
size_t tagidx;
};
- static bool compare(const kitem_t &k1, const kitem_t &k2)
- {
- return k1.state < k2.state
- || (k1.state == k2.state && k1.tagidx < k2.tagidx);
- }
+ inline clos_t();
+ inline clos_t(nfa_state_t *s, size_t i);
+ inline clos_t(nfa_state_t *s, bool *p);
+ static inline bool compare(const clos_t &c1, const clos_t &c2);
};
-void closure(kitem_t *const kernel, kitem_t *&kend, nfa_state_t *n,
- bool *tags, bool *badtags, size_t ntags);
+typedef std::vector<clos_t> closure_t;
+typedef closure_t::iterator clositer_t;
+typedef closure_t::const_iterator cclositer_t;
+
+void closure(const closure_t &clos1, closure_t &clos2, bool *tags, bool *badtags, size_t ntags);
+
+clos_t::clos_t()
+ : state(NULL)
+ , tagidx(0)
+{}
+
+clos_t::clos_t(nfa_state_t *s, size_t i)
+ : state(s)
+ , tagidx(i)
+{}
+
+clos_t::clos_t(nfa_state_t *s, bool *t)
+ : state(s)
+ , tagptr(t)
+{}
+
+bool clos_t::compare(const clos_t &c1, const clos_t &c2)
+{
+ const nfa_state_t
+ *s1 = c1.state,
+ *s2 = c2.state;
+ return s1 < s2
+ || (s1 == s2 && c1.tagidx < c2.tagidx);
+}
} // namespace re2c
{
static nfa_state_t *transition(nfa_state_t *state, uint32_t symbol);
-static void reach(const kitem_t *kernel, size_t kcount, kitem_t *&r, uint32_t symbol);
+static void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol);
const size_t dfa_t::NIL = std::numeric_limits<size_t>::max();
return NULL;
}
-void reach(const kitem_t *kernel, size_t kcount, kitem_t *&r, uint32_t symbol)
+void reach(const closure_t &clos1, closure_t &clos2, uint32_t symbol)
{
- for (size_t i = 0; i < kcount; ++i) {
+ clos2.clear();
+ for (cclositer_t c = clos1.begin(); c != clos1.end(); ++c) {
nfa_state_t
- *s = kernel[i].state,
- *t = transition(s, symbol);
- if (t) {
- r->state = t;
- r->tagidx = kernel[i].tagidx;
- ++r;
+ *s1 = c->state,
+ *s2 = transition(s1, symbol);
+ if (s2) {
+ clos2.push_back(clos_t(s2, c->tagidx));
}
}
}
, tags(*nfa.tags)
, tagpool(*nfa.tagpool)
{
+ clospool_t clospool;
+ closure_t clos1, clos2;
const size_t ntags = tags.size();
const size_t nrules = rules.size();
-
- ord_hash_set_t kernels;
- kitem_t *rstart = new kitem_t[nfa.size], *rend = rstart;
- kitem_t *kstart = new kitem_t[nfa.size], *kend = kstart;
bool *ktags = new bool[ntags]();
bool *badtags = new bool[ntags]();
bool *arctags = new bool[ntags];
bool *mask = new bool[ntags];
bool *fin = new bool[nrules];
- closure(kstart, kend, nfa.root, ktags, badtags, ntags);
- find_state(kstart, kend, kernels, tagpool);
- for (size_t i = 0; i < kernels.size(); ++i) {
+ clos1.push_back(clos_t(nfa.root, static_cast<size_t>(0)));
+ closure(clos1, clos2, ktags, badtags, ntags);
+ find_state(clos2, clospool, tagpool);
+
+ for (size_t i = 0; i < clospool.size(); ++i) {
+ const closure_t &clos0 = clospool[i];
dfa_state_t *s = new dfa_state_t(nchars);
states.push_back(s);
- const kitem_t *kernel;
- const size_t kcount = kernels.deref<const kitem_t>(i, kernel);
-
for (size_t c = 0; c < nchars; ++c) {
- rend = rstart;
- reach(kernel, kcount, rend, charset[c]);
-
- kend = kstart;
- for (const kitem_t *r = rstart; r != rend; ++r) {
- closure(kstart, kend, r->state, ktags, badtags, ntags);
- }
- s->arcs[c] = find_state(kstart, kend, kernels, tagpool);
+ reach(clos0, clos1, charset[c]);
+ closure(clos1, clos2, ktags, badtags, ntags);
+ s->arcs[c] = find_state(clos2, clospool, tagpool);
memset(arctags, 0, ntags * sizeof(bool));
memset(mask, 0, ntags * sizeof(bool));
- for (const kitem_t *r = rstart; r != rend; ++r) {
- merge_tags_with_mask(arctags, tagpool[r->tagidx], mask,
- tagpool[rules[r->state->rule].tags], badtags, ntags);
+ for (cclositer_t p = clos1.begin(); p != clos1.end(); ++p) {
+ merge_tags_with_mask(arctags, tagpool[p->tagidx], mask,
+ tagpool[rules[p->state->rule].tags], badtags, ntags);
}
s->tags[c] = tagpool.insert(arctags);
}
memset(fin, 0, nrules * sizeof(bool));
memset(arctags, 0, ntags * sizeof(bool));
memset(mask, 0, ntags * sizeof(bool));
- for (size_t j = 0; j < kcount; ++j) {
- nfa_state_t *n = kernel[j].state;
+ for (cclositer_t p = clos0.begin(); p != clos0.end(); ++p) {
+ nfa_state_t *n = p->state;
if (n->type == nfa_state_t::FIN) {
- merge_tags_with_mask(arctags, tagpool[kernel[j].tagidx], mask,
+ merge_tags_with_mask(arctags, tagpool[p->tagidx], mask,
tagpool[rules[n->rule].tags], badtags, ntags);
fin[n->rule] = true;
}
}
}
- delete[] rstart;
- delete[] kstart;
delete[] ktags;
delete[] badtags;
delete[] arctags;
#include <string.h>
#include "src/ir/dfa/find_state.h"
+#include "src/util/hash32.h"
namespace re2c
{
-size_t find_state(kitem_t *kernel, kitem_t *kend,
- ord_hash_set_t &kernels, Tagpool &tagpool)
+static uint32_t hashclos(const closure_t &clos);
+static bool eqclos(const closure_t *clos1, const closure_t *clos2);
+
+uint32_t hashclos(const closure_t &clos)
+{
+ uint32_t h = static_cast<uint32_t>(clos.size()); // seed
+ for (cclositer_t c = clos.begin(); c != clos.end(); ++c) {
+ h = hash32(h, &c->state, sizeof(c->state));
+ h = hash32(h, &c->tagidx, sizeof(c->tagidx));
+ }
+ return h;
+}
+
+bool eqclos(const closure_t *clos1, const closure_t *clos2)
+{
+ if (clos1->size() != clos2->size()) {
+ return false;
+ }
+ for (cclositer_t c1 = clos1->begin(), c2 = clos2->begin();
+ c1 != clos1->end(); ++c1, ++c2) {
+ if (c1->state != c2->state
+ || c1->tagidx != c2->tagidx) {
+ return false;
+ }
+ }
+ return true;
+}
+
+clospool_t::clospool_t(): lookup() {}
+
+clospool_t::~clospool_t()
+{
+ const size_t n = lookup.size();
+ for (size_t i = 0; i < n; ++i) {
+ delete lookup[i];
+ }
+}
+
+size_t clospool_t::size() const
{
- const size_t kcount = static_cast<size_t>(kend - kernel);
+ return lookup.size();
+}
- // zero-sized kernel corresponds to default state
- if (kcount == 0) {
+const closure_t& clospool_t::operator[](size_t idx) const
+{
+ return *lookup[idx];
+}
+
+size_t clospool_t::insert(const closure_t &clos)
+{
+ const uint32_t hash = hashclos(clos);
+
+ // try to find an identical DFA state
+ size_t idx = lookup.find_with(hash, &clos, eqclos);
+ if (idx != closlookup_t::NIL) {
+ return idx;
+ }
+
+ // otherwise add a new state
+ return lookup.push(hash, new closure_t(clos));
+}
+
+size_t find_state(closure_t &clos, clospool_t &clospool, Tagpool &tagpool)
+{
+ // empty closure corresponds to default state
+ if (clos.empty()) {
return dfa_t::NIL;
}
// dump tagsets to tagpool and address them by index:
- // this simpifies storing and comparing kernels
- for (kitem_t *k = kernel; k != kend; ++k) {
- bool *tags = k->tagptr;
- k->tagidx = tagpool.insert(tags);
+ // this simpifies storing and comparing closures
+ for (clositer_t c = clos.begin(); c != clos.end(); ++c) {
+ bool *tags = c->tagptr;
+ c->tagidx = tagpool.insert(tags);
delete[] tags;
}
- // sort kernel items to allow comparison by hash and 'memcmp'
- std::sort(kernel, kend, kitem_t::compare);
+ // sort closure to allow comparison by hash and 'memcmp'
+ std::sort(clos.begin(), clos.end(), clos_t::compare);
- return kernels.insert(kernel, kcount * sizeof(kitem_t));
+ return clospool.insert(clos);
}
} // namespace re2c
#define _RE2C_IR_DFA_FIND_STATE_
#include "src/ir/dfa/closure.h"
-#include "src/ir/tagpool.h"
-#include "src/util/ord_hash_set.h"
+#include "src/util/lookup.h"
namespace re2c
{
-size_t find_state(kitem_t *kernel, kitem_t *kend, ord_hash_set_t &kernels, Tagpool &tagpool);
+struct Tagpool;
+
+struct clospool_t
+{
+private:
+ typedef lookup_t<const closure_t*> closlookup_t;
+ closlookup_t lookup;
+
+public:
+ clospool_t();
+ ~clospool_t();
+ size_t size() const;
+ const closure_t& operator[](size_t idx) const;
+ size_t insert(const closure_t &clos);
+};
+
+size_t find_state(closure_t &clos, clospool_t &clospool, Tagpool &tagpool);
} // namespace re2c
--- /dev/null
+#ifndef _RE2C_UTIL_HASH32_
+#define _RE2C_UTIL_HASH32_
+
+#include "src/util/c99_stdint.h"
+
+namespace re2c
+{
+
+inline uint32_t hash32(uint32_t h, const void *data, size_t size)
+{
+ const uint8_t *bytes = static_cast<const uint8_t*>(data);
+ for (size_t i = 0; i < size; ++i) {
+ h = h ^ ((h << 5) + (h >> 2) + bytes[i]);
+ }
+ return h;
+}
+
+} // namespace re2c
+
+#endif // _RE2C_UTIL_HASH32_
--- /dev/null
+#ifndef _RE2C_UTIL_LOOKUP_
+#define _RE2C_UTIL_LOOKUP_
+
+#include "src/util/c99_stdint.h"
+#include <limits>
+#include <map>
+#include <vector>
+#include <stddef.h>
+
+namespace re2c
+{
+
+/*
+ * O(1) random access
+ * O(log(n)) insertion
+ */
+template<typename data_t, typename hash_t = uint32_t>
+struct lookup_t
+{
+ static const size_t NIL;
+
+private:
+ struct elem_t
+ {
+ size_t next;
+ data_t data;
+
+ elem_t(size_t n, const data_t &d)
+ : next(n), data(d) {}
+ };
+
+ std::vector<elem_t> elems;
+ std::map<hash_t, size_t> lookup;
+
+public:
+ lookup_t();
+ size_t size() const;
+ data_t& operator[](size_t idx);
+ const data_t& operator[](size_t idx) const;
+ size_t push(hash_t h, const data_t &data);
+ template<typename pred_t>
+ size_t find_with(hash_t h, const data_t &data, pred_t &pred) const;
+
+private:
+ size_t head(hash_t) const;
+};
+
+template<typename data_t, typename hash_t>
+const size_t lookup_t<data_t, hash_t>::NIL = std::numeric_limits<size_t>::max();
+
+template<typename data_t, typename hash_t>
+lookup_t<data_t, hash_t>::lookup_t()
+ : elems()
+ , lookup()
+{}
+
+template<typename data_t, typename hash_t>
+size_t lookup_t<data_t, hash_t>::size() const
+{
+ return elems.size();
+}
+
+template<typename data_t, typename hash_t>
+data_t& lookup_t<data_t, hash_t>::operator[](size_t idx)
+{
+ return elems[idx].data;
+}
+
+template<typename data_t, typename hash_t>
+const data_t& lookup_t<data_t, hash_t>::operator[](size_t idx) const
+{
+ return elems[idx].data;
+}
+
+template<typename data_t, typename hash_t>
+size_t lookup_t<data_t, hash_t>::head(hash_t h) const
+{
+ typename std::map<hash_t, size_t>::const_iterator x = lookup.find(h);
+ return x == lookup.end() ? NIL : x->second;
+}
+
+template<typename data_t, typename hash_t>
+size_t lookup_t<data_t, hash_t>::push(hash_t h, const data_t &data)
+{
+ const size_t idx = elems.size();
+ elems.push_back(elem_t(head(h), data));
+ lookup[h] = idx;
+ return idx;
+}
+
+template<typename data_t, typename hash_t>
+template<typename pred_t>
+size_t lookup_t<data_t, hash_t>::find_with(hash_t h, const data_t &data, pred_t &pred) const
+{
+ for (size_t i = head(h); i != NIL;) {
+ const elem_t &e = elems[i];
+ if (pred(e.data, data)) {
+ return i;
+ }
+ i = e.next;
+ }
+ return NIL;
+}
+
+} // namespace re2c
+
+#endif // _RE2C_UTIL_LOOKUP_