]> granicus.if.org Git - python/commitdiff
This patch addresses two main issues: (1) There exist some non-fatal
authorFred Drake <fdrake@acm.org>
Thu, 29 Jun 2000 19:17:04 +0000 (19:17 +0000)
committerFred Drake <fdrake@acm.org>
Thu, 29 Jun 2000 19:17:04 +0000 (19:17 +0000)
errors in some of the hash algorithms. For exmaple, in float_hash and
complex_hash a certain part of the value is not included in the hash
calculation. See Tim's, Guido's, and my discussion of this on
python-dev in May under the title "fix float_hash and complex_hash for
64-bit *nix"

(2) The hash algorithms that use pointers (e.g. func_hash, code_hash)
are universally not correct on Win64 (they assume that sizeof(long) ==
sizeof(void*))

As well, this patch significantly cleans up the hash code. It adds the
two function _Py_HashDouble and _PyHash_VoidPtr that the various
hashing routine are changed to use.

These help maintain the hash function invariant: (a==b) =>
(hash(a)==hash(b))) I have added Lib/test/test_hash.py and
Lib/test/output/test_hash to test this for some cases.

Include/object.h
Lib/test/output/test_hash [new file with mode: 0644]
Lib/test/test_hash.py [new file with mode: 0644]
Objects/classobject.c
Objects/complexobject.c
Objects/floatobject.c
Objects/funcobject.c
Objects/methodobject.c
Objects/object.c
PC/_winreg.c

index 2250b7fb54064eeaff9cc4963bc3872984fd304d..20757bee0171be4a36c32e9a358b7fa6e3f8d3d1 100644 (file)
@@ -293,6 +293,10 @@ extern DL_IMPORT(void) Py_ReprLeave Py_PROTO((PyObject *));
 /* tstate dict key for PyObject_Compare helper */
 extern PyObject *_PyCompareState_Key;
 
+/* Helpers for hash functions */
+extern DL_IMPORT(long) _Py_HashDouble Py_PROTO((double));
+extern DL_IMPORT(long) _Py_HashPointer Py_PROTO((void*));
+
 /* Flag bits for printing: */
 #define Py_PRINT_RAW   1       /* No string quotes etc. */
 
diff --git a/Lib/test/output/test_hash b/Lib/test/output/test_hash
new file mode 100644 (file)
index 0000000..0141a1a
--- /dev/null
@@ -0,0 +1 @@
+test_hash
diff --git a/Lib/test/test_hash.py b/Lib/test/test_hash.py
new file mode 100644 (file)
index 0000000..51b4c33
--- /dev/null
@@ -0,0 +1,26 @@
+# test the invariant that
+#   iff a==b then hash(a)==hash(b)
+#
+
+import test_support
+
+
+def same_hash(*objlist):
+       # hash each object given an raise TestFailed if
+       # the hash values are not all the same
+       hashed = map(hash, objlist)
+       for h in hashed[1:]:
+               if h != hashed[0]:
+                       raise TestFailed, "hashed values differ: %s" % `objlist`
+
+
+
+same_hash(1, 1L, 1.0, 1.0+0.0j)
+same_hash(int(1), long(1), float(1), complex(1))
+
+same_hash(long(1.23e300), float(1.23e300))
+
+same_hash(float(0.5), complex(0.5, 0.0))
+
+
+
index cd0bb1d9d83fb2d8d54be44666319f7c86ab75a5..f1dabfef5bf28587065ce85a83e0c36b0ed9b060 100644 (file)
@@ -864,10 +864,7 @@ instance_hash(inst)
                func = instance_getattr(inst, cmpstr);
                if (func == NULL) {
                        PyErr_Clear();
-                       outcome = (long)inst;
-                       if (outcome == -1)
-                               outcome = -2;
-                       return outcome;
+                       return _Py_HashPointer(inst);
                }
                PyErr_SetString(PyExc_TypeError, "unhashable instance");
                return -1;
index 42709ee09a30df298eea05e2201f22c61e5452bf..1a15a27ebd49f2943629f0dcc63a23758eccc007 100644 (file)
@@ -285,8 +285,7 @@ complex_hash(v)
        PyComplexObject *v;
 {
        double intpart, fractpart;
-       int expo;
-       long hipart, x;
+       long x;
        /* This is designed so that Python numbers with the same
           value hash to the same value, otherwise comparisons
           of mapping keys will turn out weird */
@@ -302,7 +301,7 @@ complex_hash(v)
 #endif
 
        if (fractpart == 0.0 && v->cval.imag == 0.0) {
-               if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
+               if (intpart > LONG_MAX || -intpart > LONG_MAX) {
                        /* Convert to long int and use its hash... */
                        PyObject *w = PyLong_FromDouble(v->cval.real);
                        if (w == NULL)
@@ -314,36 +313,18 @@ complex_hash(v)
                x = (long)intpart;
        }
        else {
-               fractpart = frexp(fractpart, &expo);
-               fractpart = fractpart * 2147483648.0; /* 2**31 */
-               hipart = (long)fractpart; /* Take the top 32 bits */
-               fractpart = (fractpart - (double)hipart) * 2147483648.0;
-                                               /* Get the next 32 bits */
-               x = hipart + (long)fractpart + (long)intpart + (expo << 15);
-                                               /* Combine everything */
+               x = _Py_HashDouble(v->cval.real);
+               if (x == -1)
+                       return -1;
 
                if (v->cval.imag != 0.0) { /* Hash the imaginary part */
                        /* XXX Note that this hashes complex(x, y)
                           to the same value as complex(y, x).
                           Still better than it used to be :-) */
-#ifdef MPW
-                       {
-                               extended e;
-                               fractpart = modf(v->cval.imag, &e);
-                               intpart = e;
-                       }
-#else
-                       fractpart = modf(v->cval.imag, &intpart);
-#endif
-                       fractpart = frexp(fractpart, &expo);
-                       fractpart = fractpart * 2147483648.0; /* 2**31 */
-                       hipart = (long)fractpart; /* Take the top 32 bits */
-                       fractpart =
-                               (fractpart - (double)hipart) * 2147483648.0;
-                                               /* Get the next 32 bits */
-                       x ^= hipart + (long)fractpart +
-                               (long)intpart + (expo << 15);
-                                               /* Combine everything */
+                       long y = _Py_HashDouble(v->cval.imag);
+                       if (y == -1)
+                               return -1;
+                       x += y;
                }
        }
        if (x == -1)
index 69b66b7470df9dc0ebf44015fcf5fd66aee1cfa0..29ade282bd5a3a7dc581fcc109fac7c74353fb41 100644 (file)
@@ -59,7 +59,13 @@ PERFORMANCE OF THIS SOFTWARE.
 #endif
 
 #ifndef LONG_MAX
+#if SIZEOF_LONG == 4
 #define LONG_MAX 0X7FFFFFFFL
+#elif SIZEOF_LONG == 8
+#define LONG_MAX 0X7FFFFFFFFFFFFFFFL
+#else
+#error "could not set LONG_MAX"
+#endif
 #endif
 
 #ifndef LONG_MIN
@@ -357,12 +363,12 @@ float_compare(v, w)
        return (i < j) ? -1 : (i > j) ? 1 : 0;
 }
 
+
 static long
 float_hash(v)
        PyFloatObject *v;
 {
        double intpart, fractpart;
-       int expo;
        long x;
        /* This is designed so that Python numbers with the same
           value hash to the same value, otherwise comparisons
@@ -379,7 +385,7 @@ float_hash(v)
 #endif
 
        if (fractpart == 0.0) {
-               if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) {
+               if (intpart > LONG_MAX || -intpart > LONG_MAX) {
                        /* Convert to long int and use its hash... */
                        PyObject *w = PyLong_FromDouble(v->ob_fval);
                        if (w == NULL)
@@ -393,14 +399,9 @@ float_hash(v)
        else {
                /* Note -- if you change this code, also change the copy
                   in complexobject.c */
-               long hipart;
-               fractpart = frexp(fractpart, &expo);
-               fractpart = fractpart * 2147483648.0; /* 2**31 */
-               hipart = (long)fractpart; /* Take the top 32 bits */
-               fractpart = (fractpart - (double)hipart) * 2147483648.0;
-                                               /* Get the next 32 bits */
-               x = hipart + (long)fractpart + (long)intpart + (expo << 15);
-                                               /* Combine everything */
+               x = _Py_HashDouble(v->ob_fval);
+               if (x == -1)
+                       return -1;
        }
        if (x == -1)
                x = -2;
index b976eab28f5979d628d30d92a5c0d4f09a242464..d9ce0271dc0b1dd07ea02a5565e7637ca7f825d5 100644 (file)
@@ -231,10 +231,12 @@ static long
 func_hash(f)
        PyFunctionObject *f;
 {
-       long h;
+       long h,x;
        h = PyObject_Hash(f->func_code);
        if (h == -1) return h;
-       h = h ^ (long)f->func_globals;
+       x = _Py_HashPointer(f->func_globals);
+       if (x == -1) return x;
+       h ^= x;
        if (h == -1) h = -2;
        return h;
 }
index 8b67a87e79abf376a9ecd86a8e5a4781f72c3bda..580bb2f25ba789f0448ff45debc02f750c1dcd34 100644 (file)
@@ -172,7 +172,7 @@ static long
 meth_hash(a)
        PyCFunctionObject *a;
 {
-       long x;
+       long x,y;
        if (a->m_self == NULL)
                x = 0;
        else {
@@ -180,7 +180,13 @@ meth_hash(a)
                if (x == -1)
                        return -1;
        }
-       return x ^ (long) a->m_ml->ml_meth;
+       y = _Py_HashPointer(a->m_ml->ml_meth);
+       if (y == -1)
+               return -1;
+       x ^= y;
+       if (x == -1)
+               x = -2;
+       return x;
 }
 
 PyTypeObject PyCFunction_Type = {
index 9a0e87c6ef61992177877d613e064ddb3569a9f1..9ed03b2fbe9b37a3384ab7687beae2914fb3dadb 100644 (file)
@@ -33,6 +33,8 @@ PERFORMANCE OF THIS SOFTWARE.
 
 #include "Python.h"
 
+#include "mymath.h"
+
 /* just for trashcan: */
 #include "compile.h"
 #include "frameobject.h"
@@ -507,6 +509,62 @@ PyObject_Compare(v, w)
        return result;
 }
 
+
+/* Set of hash utility functions to help maintaining the invariant that
+       iff a==b then hash(a)==hash(b)
+
+   All the utility functions (_Py_Hash*()) return "-1" to signify an error.
+*/
+
+long
+_Py_HashDouble(v)
+    double v;
+{
+       /* Use frexp to get at the bits in the double.
+        * Since the VAX D double format has 56 mantissa bits, which is the
+        * most of any double format in use, each of these parts may have as
+        * many as (but no more than) 56 significant bits.
+        * So, assuming sizeof(long) >= 4, each part can be broken into two longs;
+        * frexp and multiplication are used to do that.
+        * Also, since the Cray double format has 15 exponent bits, which is the
+        * most of any double format in use, shifting the exponent field left by
+        * 15 won't overflow a long (again assuming sizeof(long) >= 4).
+        */
+    int expo;
+    long hipart;
+
+    v = frexp(v, &expo);
+    v = v * 2147483648.0; /* 2**31 */
+    hipart = (long)v; /* Take the top 32 bits */
+       v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */
+
+    return hipart + (long)v + (expo << 15); /* Combine everything */
+}
+
+long
+_Py_HashPointer(p)
+       void *p;
+{
+#if SIZEOF_LONG >= SIZEOF_VOID_P
+       return (long)p;
+#else
+       /* convert to a Python long and hash that */
+       PyObject* longobj;
+       long x;
+       
+       if ((longobj = PyLong_FromVoidPtr(p)) == NULL) {
+               x = -1;
+               goto finally;
+       }
+       x = PyObject_Hash(longobj);
+       
+finally:
+       Py_XDECREF(longobj);
+       return x;
+#endif
+}
+
+
 long
 PyObject_Hash(v)
        PyObject *v;
@@ -514,8 +572,9 @@ PyObject_Hash(v)
        PyTypeObject *tp = v->ob_type;
        if (tp->tp_hash != NULL)
                return (*tp->tp_hash)(v);
-       if (tp->tp_compare == NULL)
-               return (long) v; /* Use address as hash value */
+       if (tp->tp_compare == NULL) {
+               return _Py_HashPointer(v); /* Use address as hash value */
+       }
        /* If there's a cmp but no hash defined, the object can't be hashed */
        PyErr_SetString(PyExc_TypeError, "unhashable type");
        return -1;
index f92476eb626c3be6c5a222d9a5821498d186057b..45399590e6ff27f90b3edbed44f0c530ee73def7 100644 (file)
@@ -423,7 +423,7 @@ PyHKEY_hashFunc(PyObject *ob)
        /* Just use the address.
           XXX - should we use the handle value?
        */
-       return (long)ob;
+       return _Py_HashPointer(ob);
 }