From: Andi Gutmans <andi@php.net>
Date: Sat, 8 Jun 2002 12:44:39 +0000 (+0000)
Subject: - Add a loop unrolled version of the hash function and a bit of an
X-Git-Tag: php5_5_0~130
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1186d51c3a85041a3a680e1a750675ebeab4be07;p=php

- Add a loop unrolled version of the hash function and a bit of an
- explanation about our hash function (Ralf S. Engelschall)
---

diff --git a/Zend/zend_hash.h b/Zend/zend_hash.h
index c28b38d46b..479d69a649 100644
--- a/Zend/zend_hash.h
+++ b/Zend/zend_hash.h
@@ -188,18 +188,68 @@ ZEND_API int zend_hash_num_elements(HashTable *ht);
 
 ZEND_API int zend_hash_rehash(HashTable *ht);
 
+/*
+ * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
+ *
+ * This is Daniel J. Bernstein's popular `times 33' hash function as
+ * posted by him years ago on comp.lang.c. It basically uses a function
+ * like ``hash(i) = hash(i-1) * 33 + str[i]''. This is one of the best
+ * known hash functions for strings. Because it is both computed very
+ * fast and distributes very well.
+ *
+ * The magic of number 33, i.e. why it works better than many other
+ * constants, prime or not, has never been adequately explained by
+ * anyone. So I try an explanation: if one experimentally tests all
+ * multipliers between 1 and 256 (as RSE did now) one detects that even
+ * numbers are not useable at all. The remaining 128 odd numbers
+ * (except for the number 1) work more or less all equally well. They
+ * all distribute in an acceptable way and this way fill a hash table
+ * with an average percent of approx. 86%. 
+ *
+ * If one compares the Chi^2 values of the variants, the number 33 not
+ * even has the best value. But the number 33 and a few other equally
+ * good numbers like 17, 31, 63, 127 and 129 have nevertheless a great
+ * advantage to the remaining numbers in the large set of possible
+ * multipliers: their multiply operation can be replaced by a faster
+ * operation based on just one shift plus either a single addition
+ * or subtraction operation. And because a hash function has to both
+ * distribute good _and_ has to be very fast to compute, those few
+ * numbers should be preferred and seems to be the reason why Daniel J.
+ * Bernstein also preferred it.
+ *
+ *
+ *                  -- Ralf S. Engelschall <rse@engelschall.com>
+ */
+
 static inline ulong zend_inline_hash_func(char *arKey, uint nKeyLength)
 {
-	ulong h = 5381;
-	char *arEnd = arKey + nKeyLength;
-
-	while (arKey < arEnd) {
-		h += (h << 5);
-		h ^= (ulong) *arKey++;
+	register ulong hash = 5381;
+
+	/* variant with the hash unrolled eight times */
+	for (; nKeyLength >= 8; nKeyLength -= 8) {
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+		hash = ((hash << 5) + hash) + *arKey++;
+	}
+	switch (nKeyLength) {
+		case 7: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 6: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 5: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 4: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 3: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 2: hash = ((hash << 5) + hash) + *arKey++; /* fallthrough... */
+		case 1: hash = ((hash << 5) + hash) + *arKey++; break;
+		default: /* case 0: */ break;
 	}
-	return h;
+	return hash;
 }
 
+
 ZEND_API ulong zend_hash_func(char *arKey, uint nKeyLength);
 
 #if ZEND_DEBUG