From 5003f00ab63080a46e35df755a2c23fef3bb6fc5 Mon Sep 17 00:00:00 2001 From: Ulya Trofimovich Date: Tue, 27 Sep 2016 13:43:23 +0100 Subject: [PATCH] Ported 'Tagpool' on 'lookup_t' and got rid of 'ord_hash_set_t'. An effort to reduce entities with similar functionality: 'lookup_t' is more flexible than 'ord_hash_set_t'. --- re2c/Makefile.am | 2 - re2c/src/ir/dfa/dfa.h | 1 - re2c/src/ir/tagpool.cc | 33 ++++++++++++----- re2c/src/ir/tagpool.h | 19 +++++++++- re2c/src/util/ord_hash_set.cc | 70 ----------------------------------- re2c/src/util/ord_hash_set.h | 54 --------------------------- 6 files changed, 40 insertions(+), 139 deletions(-) delete mode 100644 re2c/src/util/ord_hash_set.cc delete mode 100644 re2c/src/util/ord_hash_set.h diff --git a/re2c/Makefile.am b/re2c/Makefile.am index 480320aa..1a5cbbd0 100644 --- a/re2c/Makefile.am +++ b/re2c/Makefile.am @@ -62,7 +62,6 @@ SRC_HDR = \ src/util/hash32.h \ src/util/local_increment.h \ src/util/lookup.h \ - src/util/ord_hash_set.h \ src/util/range.h \ src/util/s_to_n32_unsafe.h \ src/util/smart_ptr.h \ @@ -124,7 +123,6 @@ SRC = \ src/parse/input.cc \ src/parse/scanner.cc \ src/parse/unescape.cc \ - src/util/ord_hash_set.cc \ src/util/s_to_n32_unsafe.cc \ src/util/range.cc re2c_SOURCES = \ diff --git a/re2c/src/ir/dfa/dfa.h b/re2c/src/ir/dfa/dfa.h index f5c833f3..a0eda8d4 100644 --- a/re2c/src/ir/dfa/dfa.h +++ b/re2c/src/ir/dfa/dfa.h @@ -10,7 +10,6 @@ #include "src/ir/rule.h" #include "src/ir/tagpool.h" #include "src/util/forbid_copy.h" -#include "src/util/ord_hash_set.h" namespace re2c { diff --git a/re2c/src/ir/tagpool.cc b/re2c/src/ir/tagpool.cc index 9b24b912..fbebe8c6 100644 --- a/re2c/src/ir/tagpool.cc +++ b/re2c/src/ir/tagpool.cc @@ -1,12 +1,12 @@ #include "src/ir/tagpool.h" -#include "src/util/forbid_copy.h" +#include "src/util/hash32.h" namespace re2c { Tagpool::Tagpool(size_t n) : ntags(n) - , pool() + , lookup() , buff(new bool[ntags]()) { // all-no tag set must have number 0 @@ -16,11 +16,31 @@ Tagpool::Tagpool(size_t n) Tagpool::~Tagpool() { delete[] buff; + const size_t n = lookup.size(); + for (size_t i = 0; i < n; ++i) { + free(const_cast(lookup[i])); + } } size_t Tagpool::insert(const bool *tags) { - return pool.insert(tags, ntags * sizeof(bool)); + const size_t size = ntags * sizeof(bool); + const uint32_t hash = hash32(0, tags, size); + + eqtag_t eq(ntags); + const size_t idx = lookup.find_with(hash, tags, eq); + if (idx != taglookup_t::NIL) { + return idx; + } + + bool *copy = static_cast(malloc(size)); + memcpy(copy, tags, size); + return lookup.push(hash, copy); +} + +const bool *Tagpool::operator[](size_t idx) +{ + return lookup[idx]; } size_t Tagpool::orl(size_t t, size_t o) @@ -83,11 +103,4 @@ size_t Tagpool::subst(size_t t, const size_t *represent) return insert(buff); } -const bool *Tagpool::operator[](size_t idx) -{ - const bool *tags; - pool.deref(idx, tags); - return tags; -} - } // namespace re2c diff --git a/re2c/src/ir/tagpool.h b/re2c/src/ir/tagpool.h index fc9de8fc..2dd12b9b 100644 --- a/re2c/src/ir/tagpool.h +++ b/re2c/src/ir/tagpool.h @@ -1,18 +1,33 @@ #ifndef _RE2C_IR_TAGPOOL_ #define _RE2C_IR_TAGPOOL_ -#include "src/util/ord_hash_set.h" +#include // malloc +#include // memcpy, memcmp + +#include "src/util/lookup.h" #include "src/util/forbid_copy.h" namespace re2c { +struct eqtag_t +{ + size_t ntags; + + explicit eqtag_t(size_t n): ntags(n) {} + inline bool operator()(const bool *x, const bool *y) + { + return memcmp(x, y, ntags * sizeof(bool)) == 0; + } +}; + struct Tagpool { const size_t ntags; private: - ord_hash_set_t pool; + typedef lookup_t taglookup_t; + taglookup_t lookup; bool *buff; public: diff --git a/re2c/src/util/ord_hash_set.cc b/re2c/src/util/ord_hash_set.cc deleted file mode 100644 index 58e30a5d..00000000 --- a/re2c/src/util/ord_hash_set.cc +++ /dev/null @@ -1,70 +0,0 @@ -#include - -#include "src/util/ord_hash_set.h" - -namespace re2c -{ - -ord_hash_set_t::hash_t ord_hash_set_t::hash(const void *data, size_t size) -{ - const uint8_t *bytes = static_cast(data); - hash_t h = size; // seed - for (size_t i = 0; i < size; ++i) - { - h = h ^ ((h << 5) + (h >> 2) + bytes[i]); - } - return h; -} - -ord_hash_set_t::elem_t* ord_hash_set_t::make_elem( - elem_t *next, - size_t index, - size_t size, - const void *data) -{ - elem_t *e = static_cast(malloc(offsetof(elem_t, data) + size)); - e->next = next; - e->index = index; - e->size = size; - memcpy(e->data, data, size); - return e; -} - -ord_hash_set_t::ord_hash_set_t() - : elems() - , lookup() -{} - -ord_hash_set_t::~ord_hash_set_t() -{ - std::for_each(elems.begin(), elems.end(), free); -} - -size_t ord_hash_set_t::size() const -{ - return elems.size(); -} - -size_t ord_hash_set_t::insert(const void *data, size_t size) -{ - const hash_t h = hash(data, size); - - std::map::const_iterator i = lookup.find(h); - if (i != lookup.end()) - { - for (elem_t *e = i->second; e; e = e->next) - { - if (e->size == size - && memcmp(e->data, data, size) == 0) - { - return e->index; - } - } - } - - const size_t index = elems.size(); - elems.push_back(lookup[h] = make_elem(lookup[h], index, size, data)); - return index; -} - -} // namespace re2c diff --git a/re2c/src/util/ord_hash_set.h b/re2c/src/util/ord_hash_set.h deleted file mode 100644 index afa1c69b..00000000 --- a/re2c/src/util/ord_hash_set.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _RE2C_UTIL_ORD_HASH_SET_ -#define _RE2C_UTIL_ORD_HASH_SET_ - -#include "src/util/c99_stdint.h" -#include // malloc, free -#include // memcpy -#include // for_each -#include -#include - -namespace re2c -{ - -/* - * ordered hash set: - * - access element by index: O(1) - * - insert element (find existing or add new): O(log(n)) - * - */ -class ord_hash_set_t -{ - struct elem_t - { - elem_t *next; - size_t index; - size_t size; - char data[1]; // inlined array of variable length - }; - typedef size_t hash_t; - - std::vector elems; - std::map lookup; - - static hash_t hash(const void *data, size_t size); - elem_t *make_elem(elem_t *next, size_t index, size_t size, const void *data); - -public: - ord_hash_set_t(); - ~ord_hash_set_t(); - size_t size() const; - size_t insert(const void *data, size_t size); - template size_t deref(size_t i, data_t *&data); -}; - -template size_t ord_hash_set_t::deref(size_t i, data_t *&data) -{ - elem_t *e = elems[i]; - data = reinterpret_cast(e->data); - return e->size / sizeof(data_t); -} - -} // namespace re2c - -#endif // _RE2C_UTIL_ORD_HASH_SET_ -- 2.40.0