SF bug #513866: Float/long comparison anomaly.

author Tim Peters <tim.peters@gmail.com>

Thu, 23 Sep 2004 08:06:40 +0000 (08:06 +0000)

committer Tim Peters <tim.peters@gmail.com>

Thu, 23 Sep 2004 08:06:40 +0000 (08:06 +0000)
author Tim Peters <tim.peters@gmail.com>
Thu, 23 Sep 2004 08:06:40 +0000 (08:06 +0000)
committer Tim Peters <tim.peters@gmail.com>
Thu, 23 Sep 2004 08:06:40 +0000 (08:06 +0000)
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py

index 1a04ce92d065a1204c88c6f3cf71bdc1656437a3..74ae7c65df3e1c1b1b453c42229112d1c90836fa 100644 (file)
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -387,8 +387,7 @@ def test_float_overflow():
                   "1. ** huge", "huge ** 1.", "1. ** mhuge", "mhuge ** 1.",
                   "math.sin(huge)", "math.sin(mhuge)",
                   "math.sqrt(huge)", "math.sqrt(mhuge)", # should do better
-                 "math.floor(huge)", "math.floor(mhuge)",
-                 "float(shuge) == int(shuge)"]:
+                 "math.floor(huge)", "math.floor(mhuge)"]:
  
          try:
              eval(test, namespace)
@@ -397,6 +396,11 @@ def test_float_overflow():
          else:
              raise TestFailed("expected OverflowError from %s" % test)
  
+        # XXX Perhaps float(shuge) can raise OverflowError on some box?
+        # The comparison should not.
+        if float(shuge) == int(shuge):
+            raise TestFailed("float(shuge) should not equal int(shuge)")
+
  # ---------------------------------------------- test huge log and log10
  
  def test_logs():
@@ -431,6 +435,101 @@ def test_logs():
          except ValueError:
              pass
  
+# ----------------------------------------------- test mixed comparisons
+
+def test_mixed_compares():
+    import math
+    import sys
+
+    if verbose:
+        print "mixed comparisons"
+
+    # We're mostly concerned with that mixing floats and longs does the
+    # right stuff, even when longs are too large to fit in a float.
+    # The safest way to check the results is to use an entirely different
+    # method, which we do here via a skeletal rational class (which
+    # represents all Python ints, longs and floats exactly).
+    class Rat:
+        def __init__(self, value):
+            if isinstance(value, (int, long)):
+                self.n = value
+                self.d = 1
+
+            elif isinstance(value, float):
+                # Convert to exact rational equivalent.
+                f, e = math.frexp(abs(value))
+                assert f == 0 or 0.5 <= f < 1.0
+                # |value| = f * 2**e exactly
+
+                # Suck up CHUNK bits at a time; 28 is enough so that we suck
+                # up all bits in 2 iterations for all known binary double-
+                # precision formats, and small enough to fit in an int.
+                CHUNK = 28
+                top = 0
+                # invariant: |value| = (top + f) * 2**e exactly
+                while f:
+                    f = math.ldexp(f, CHUNK)
+                    digit = int(f)
+                    assert digit >> CHUNK == 0
+                    top = (top << CHUNK) | digit
+                    f -= digit
+                    assert 0.0 <= f < 1.0
+                    e -= CHUNK
+
+                # Now |value| = top * 2**e exactly.
+                if e >= 0:
+                    n = top << e
+                    d = 1
+                else:
+                    n = top
+                    d = 1 << -e
+                if value < 0:
+                    n = -n
+                self.n = n
+                self.d = d
+                assert float(n) / float(d) == value
+
+            else:
+                raise TypeError("can't deal with %r" % val)
+
+        def __cmp__(self, other):
+            if not isinstance(other, Rat):
+                other = Rat(other)
+            return cmp(self.n * other.d, self.d * other.n)
+
+    cases = [0, 0.001, 0.99, 1.0, 1.5, 1e20, 1e200]
+    # 2**48 is an important boundary in the internals.  2**53 is an
+    # important boundary for IEEE double precision.
+    for t in 2.0**48, 2.0**50, 2.0**53:
+        cases.extend([t - 1.0, t - 0.3, t, t + 0.3, t + 1.0,
+                      long(t-1), long(t), long(t+1)])
+    cases.extend([0, 1, 2, sys.maxint, float(sys.maxint)])
+    # 1L<<20000 should exceed all double formats.  long(1e200) is to
+    # check that we get equality with 1e200 above.
+    t = long(1e200)
+    cases.extend([0L, 1L, 2L, 1L << 20000, t-1, t, t+1])
+    cases.extend([-x for x in cases])
+    for x in cases:
+        Rx = Rat(x)
+        for y in cases:
+            Ry = Rat(y)
+            Rcmp = cmp(Rx, Ry)
+            xycmp = cmp(x, y)
+            if Rcmp != xycmp:
+                raise TestFailed('%r %r %d %d' % (x, y, Rcmp, xycmp))
+            if (x == y) != (Rcmp == 0):
+                raise TestFailed('%r == %r %d' % (x, y, Rcmp))
+            if (x != y) != (Rcmp != 0):
+                raise TestFailed('%r != %r %d' % (x, y, Rcmp))
+            if (x < y) != (Rcmp < 0):
+                raise TestFailed('%r < %r %d' % (x, y, Rcmp))
+            if (x <= y) != (Rcmp <= 0):
+                raise TestFailed('%r <= %r %d' % (x, y, Rcmp))
+            if (x > y) != (Rcmp > 0):
+                raise TestFailed('%r > %r %d' % (x, y, Rcmp))
+            if (x >= y) != (Rcmp >= 0):
+                raise TestFailed('%r >= %r %d' % (x, y, Rcmp))
+
  # ---------------------------------------------------------------- do it
  
  test_division()
@@ -441,3 +540,4 @@ test_misc()
  test_auto_overflow()
  test_float_overflow()
  test_logs()
+test_mixed_compares()
diff --git a/Misc/NEWS b/Misc/NEWS

index 3c2734b2473aa9bbdbed65e4232ef17b2ba1f1a9..89a6c5712f8605e7352a4f243c0cf3e227fc4ca9 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -15,7 +15,14 @@ Core and builtins
  - The bytecode optimizer now folds tuples of constants into a single
    constant.
  
-- PyLong_AsUnsignedLong[Mask] now support int objects as well.
+- SF bug #513866:  Float/long comparison anomaly.  Prior to 2.4b1, when
+  an integer was compared to a float, the integer was coerced to a float.
+  That could yield spurious overflow errors (if the integer was very
+  large), and to anomalies such as
+  ``long(1e200)+1 == 1e200 == long(1e200)-1``.  Coercion to float is no
+  longer performed, and cases like ``long(1e200)-1 < 1e200``,
+  ``long(1e200)+1 > 1e200`` and ``(1 << 20000) > 1e200`` are computed
+  correctly now.
  
  Extension modules
  -----------------
@@ -72,6 +79,8 @@ Build
  C API
  -----
  
+- PyLong_AsUnsignedLong[Mask] now support int objects as well.
+
  - SF patch #998993: ``PyUnicode_DecodeUTF8Stateful`` and
    ``PyUnicode_DecodeUTF16Stateful`` have been added, which implement stateful
    decoding.
diff --git a/Objects/floatobject.c b/Objects/floatobject.c

index 774d2ead6a423bb8c05ac24d2363a2ab4c2fd83c..46c05b2ba7c545b926dddffb9b0fb40e2b80aa10 100644 (file)
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -354,38 +354,236 @@ float_str(PyFloatObject *v)
         return PyString_FromString(buf);
  }
  
+/* Comparison is pretty much a nightmare.  When comparing float to float,
+ * we do it as straightforwardly (and long-windedly) as conceivable, so
+ * that, e.g., Python x == y delivers the same result as the platform
+ * C x == y when x and/or y is a NaN.
+ * When mixing float with an integer type, there's no good *uniform* approach.
+ * Converting the double to an integer obviously doesn't work, since we
+ * may lose info from fractional bits.  Converting the integer to a double
+ * also has two failure modes:  (1) a long int may trigger overflow (too
+ * large to fit in the dynamic range of a C double); (2) even a C long may have
+ * more bits than fit in a C double (e.g., on a a 64-bit box long may have
+ * 63 bits of precision, but a C double probably has only 53), and then
+ * we can falsely claim equality when low-order integer bits are lost by
+ * coercion to double.  So this part is painful too.
+ */
+
  static PyObject*
  float_richcompare(PyObject *v, PyObject *w, int op)
  {
         double i, j;
         int r = 0;
  
-       CONVERT_TO_DOUBLE(v, i);
-       CONVERT_TO_DOUBLE(w, j);
+       assert(PyFloat_Check(v));
+       i = PyFloat_AS_DOUBLE(v);
+
+       /* Switch on the type of w.  Set i and j to doubles to be compared,
+        * and op to the richcomp to use.
+        */
+       if (PyFloat_Check(w))
+               j = PyFloat_AS_DOUBLE(w);
  
+       else if (Py_IS_INFINITY(i)) {
+               /* XXX If we had a reliable way to check whether i is a
+                * XXX NaN, it would belong in this branch too.
+                */
+               if (PyInt_Check(w) || PyLong_Check(w))
+                       /* The magnitude of i exceeds any finite integer,
+                        * so it doesn't matter which int we compare i with.
+                        */
+                       j = 0.0;
+               else
+                       goto Unimplemented;
+       }
+
+       else if (PyInt_Check(w)) {
+               long jj = PyInt_AS_LONG(w);
+               /* In the worst realistic case I can imagine, C double is a
+                * Cray single with 48 bits of precision, and long has 64
+                * bits.
+                */
+#if SIZEOF_LONG > 4
+               unsigned long abs = (unsigned long)(jj < 0 ? -jj : jj);
+               if (abs >> 48) {
+                       /* Needs more than 48 bits.  Make it take the
+                        * PyLong path.
+                        */
+                       PyObject *result;
+                       PyObject *ww = PyLong_FromLong(jj);
+
+                       if (ww == NULL)
+                               return NULL;
+                       result = float_richcompare(v, ww, op);
+                       Py_DECREF(ww);
+                       return result;
+               }
+#endif
+               j = (double)jj;
+               assert((long)j == jj);
+       }
+
+       else if (PyLong_Check(w)) {
+               int vsign = i == 0.0 ? 0 : i < 0.0 ? -1 : 1;
+               int wsign = _PyLong_Sign(w);
+               size_t nbits;
+               double mant;
+               int exponent;
+
+               if (vsign != wsign) {
+                       /* Magnitudes are irrelevant -- the signs alone
+                        * determine the outcome.
+                        */
+                       i = (double)vsign;
+                       j = (double)wsign;
+                       goto Compare;
+               }
+               /* The signs are the same. */
+               /* Convert w to a double if it fits.  In particular, 0 fits. */
+               nbits = _PyLong_NumBits(w);
+               if (nbits == (size_t)-1 && PyErr_Occurred()) {
+                       /* This long is so large that size_t isn't big enough
+                        * to hold the # of Python digits.  Replace with
+                        * little doubles that give the same outcome --
+                        * w is so large that its magnitude must exceed
+                        * the magnitude of any finite float.
+                        */
+                       PyErr_Clear();
+                       i = (double)vsign;
+                       assert(wsign != 0);
+                       j = wsign * 2.0;
+                       goto Compare;
+               }
+               if (nbits <= 48) {
+                       j = PyLong_AsDouble(w);
+                       /* It's impossible that <= 48 bits overflowed. */
+                       assert(j != -1.0 || ! PyErr_Occurred());
+                       goto Compare;
+               }
+               assert(wsign != 0); /* else nbits was 0 */
+               assert(vsign != 0); /* if vsign were 0, then since wsign is
+                                    * not 0, we would have taken the
+                                    * vsign != wsign branch at the start */
+               /* We want to work with non-negative numbers. */
+               if (vsign < 0) {
+                       /* "Multiply both sides" by -1; this also swaps the
+                        * comparator.
+                        */
+                       i = -i;
+                       op = _Py_SwappedOp[op];
+               }
+               assert(i > 0.0);
+               mant = frexp(i, &exponent);
+               /* exponent is the # of bits in v before the radix point;
+                * we know that nbits (the # of bits in w) > 48 at this point
+                */
+               if (exponent < 0 || (size_t)exponent < nbits) {
+                       i = 1.0;
+                       j = 2.0;
+                       goto Compare;
+               }
+               if ((size_t)exponent > nbits) {
+                       i = 2.0;
+                       j = 1.0;
+                       goto Compare;
+               }
+               /* v and w have the same number of bits before the radix
+                * point.  Construct two longs that have the same comparison
+                * outcome.
+                */
+               {
+                       double fracpart;
+                       double intpart;
+                       PyObject *result = NULL;
+                       PyObject *one = NULL;
+                       PyObject *vv = NULL;
+                       PyObject *ww = w;
+
+                       if (wsign < 0) {
+                               ww = PyNumber_Negative(w);
+                               if (ww == NULL)
+                                       goto Error;
+                       }
+                       else
+                               Py_INCREF(ww);
+
+                       fracpart = modf(i, &intpart);
+                       vv = PyLong_FromDouble(intpart);
+                       if (vv == NULL)
+                               goto Error;
+
+                       if (fracpart != 0.0) {
+                               /* Shift left, and or a 1 bit into vv
+                                * to represent the lost fraction.
+                                */
+                               PyObject *temp;
+
+                               one = PyInt_FromLong(1);
+                               if (one == NULL)
+                                       goto Error;
+
+                               temp = PyNumber_Lshift(ww, one);
+                               if (temp == NULL)
+                                       goto Error;
+                               Py_DECREF(ww);
+                               ww = temp;
+
+                               temp = PyNumber_Lshift(vv, one);
+                               if (temp == NULL)
+                                       goto Error;
+                               Py_DECREF(vv);
+                               vv = temp;
+
+                               temp = PyNumber_Or(vv, one);
+                               if (temp == NULL)
+                                       goto Error;
+                               Py_DECREF(vv);
+                               vv = temp;
+                       }
+
+                       r = PyObject_RichCompareBool(vv, ww, op);
+                       if (r < 0)
+                               goto Error;
+                       result = PyBool_FromLong(r);
+                Error:
+                       Py_XDECREF(vv);
+                       Py_XDECREF(ww);
+                       Py_XDECREF(one);
+                       return result;
+               }
+       } /* else if (PyLong_Check(w)) */
+
+       else    /* w isn't float, int, or long */
+               goto Unimplemented;
+
+ Compare:
         PyFPE_START_PROTECT("richcompare", return NULL)
         switch (op) {
         case Py_EQ:
-               r = i==j;
+               r = i == j;
                 break;
         case Py_NE:
-               r = i!=j;
+               r = i != j;
                 break;
         case Py_LE:
-               r = i<=j;
+               r = i <= j;
                 break;
         case Py_GE:
-               r = i>=j;
+               r = i >= j;
                 break;
         case Py_LT:
-               r = i<j;
+               r = i < j;
                 break;
         case Py_GT:
-               r = i>j;
+               r = i > j;
                 break;
         }
         PyFPE_END_PROTECT(r)
         return PyBool_FromLong(r);
+
+ Unimplemented:
+       Py_INCREF(Py_NotImplemented);
+       return Py_NotImplemented;
  }
  
  static long
author	Tim Peters <tim.peters@gmail.com>
	Thu, 23 Sep 2004 08:06:40 +0000 (08:06 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Thu, 23 Sep 2004 08:06:40 +0000 (08:06 +0000)
Lib/test/test_long.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/floatobject.c		patch \| blob \| history