From: Ulya Trofimovich Date: Wed, 8 May 2019 08:57:59 +0000 (+0100) Subject: Hash in 4-byte chunks to speedup determinization on large automata. X-Git-Tag: 1.2~75 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22b975cba77a3d15d0d7121ea03e5a71f1a98128;p=re2c Hash in 4-byte chunks to speedup determinization on large automata. --- diff --git a/src/util/hash32.h b/src/util/hash32.h index 9a031415..2d57c8c0 100644 --- a/src/util/hash32.h +++ b/src/util/hash32.h @@ -7,12 +7,36 @@ namespace re2c { +static inline uint32_t hash4(uint32_t h, uint32_t k) +{ + return h ^ ((h << 5) + (h >> 2) + k); +} + +// hash in 4-byte chunks for speed inline uint32_t hash32(uint32_t h, const void *data, size_t size) { - const uint8_t *bytes = static_cast(data); - for (size_t i = 0; i < size; ++i) { - h = h ^ ((h << 5) + (h >> 2) + bytes[i]); + static const uintptr_t ALIGN = sizeof(uint32_t); + static const uintptr_t MASK = ~(ALIGN - 1); + + uintptr_t p = reinterpret_cast(data); + const uintptr_t e = p + size; + + // first aligned address in range + const uintptr_t p1 = (p + ALIGN - 1) & MASK; + + // last aligned address in range + const uintptr_t p2 = e & MASK; + + for (; p < p1; ++p) { + h = hash4(h, *reinterpret_cast(p)); + } + for (; p < p2; p += 4) { + h = hash4(h, *reinterpret_cast(p)); } + for (; p < e; ++p) { + h = hash4(h, *reinterpret_cast(p)); + } + return h; }