From: Andi Gutmans Date: Sat, 8 Jun 2002 12:44:39 +0000 (+0000) Subject: - Add a loop unrolled version of the hash function and a bit of an X-Git-Tag: php5_5_0~130 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1186d51c3a85041a3a680e1a750675ebeab4be07;p=php - Add a loop unrolled version of the hash function and a bit of an - explanation about our hash function (Ralf S. Engelschall) --- diff --git a/Zend/zend_hash.h b/Zend/zend_hash.h index c28b38d46b..479d69a649 100644 --- a/Zend/zend_hash.h +++ b/Zend/zend_hash.h @@ -188,18 +188,68 @@ ZEND_API int zend_hash_num_elements(HashTable *ht); ZEND_API int zend_hash_rehash(HashTable *ht); +/* + * DJBX33A (Daniel J. Bernstein, Times 33 with Addition) + * + * This is Daniel J. Bernstein's popular `times 33' hash function as + * posted by him years ago on comp.lang.c. It basically uses a function + * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best + * known hash functions for strings. Because it is both computed very + * fast and distributes very well. + * + * The magic of number 33, i.e. why it works better than many other + * constants, prime or not, has never been adequately explained by + * anyone. So I try an explanation: if one experimentally tests all + * multipliers between 1 and 256 (as RSE did now) one detects that even + * numbers are not useable at all. The remaining 128 odd numbers + * (except for the number 1) work more or less all equally well. They + * all distribute in an acceptable way and this way fill a hash table + * with an average percent of approx. 86%. + * + * If one compares the Chi^2 values of the variants, the number 33 not + * even has the best value. But the number 33 and a few other equally + * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great + * advantage to the remaining numbers in the large set of possible + * multipliers: their multiply operation can be replaced by a faster + * operation based on just one shift plus either a single addition + * or subtraction operation. And because a hash function has to both + * distribute good _and_ has to be very fast to compute, those few + * numbers should be preferred and seems to be the reason why Daniel J. + * Bernstein also preferred it. + * + * + * -- Ralf S. Engelschall + */ + static inline ulong zend_inline_hash_func(char *arKey, uint nKeyLength) { - ulong h = 5381; - char *arEnd = arKey + nKeyLength; - - while (arKey < arEnd) { - h += (h << 5); - h ^= (ulong) *arKey++; + register ulong hash = 5381; + + /* variant with the hash unrolled eight times */ + for (; nKeyLength >= 8; nKeyLength -= 8) { + hash = ((hash << 5) + hash) + *arKey++; + hash = ((hash << 5) + hash) + *arKey++; + hash = ((hash << 5) + hash) + *arKey++; + hash = ((hash << 5) + hash) + *arKey++; + hash = ((hash << 5) + hash) + *arKey++; + hash = ((hash << 5) + hash) + *arKey++; + hash = ((hash << 5) + hash) + *arKey++; + hash = ((hash << 5) + hash) + *arKey++; + } + switch (nKeyLength) { + case 7: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ + case 6: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ + case 5: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ + case 4: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ + case 3: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ + case 2: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */ + case 1: hash = ((hash << 5) + hash) + *arKey++; break; + default: /* case 0: */ break; } - return h; + return hash; } + ZEND_API ulong zend_hash_func(char *arKey, uint nKeyLength); #if ZEND_DEBUG