Issue #3166: Make long -> float (and int -> float) conversions

author Mark Dickinson <dickinsm@gmail.com>

Mon, 20 Apr 2009 21:13:33 +0000 (21:13 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Mon, 20 Apr 2009 21:13:33 +0000 (21:13 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Mon, 20 Apr 2009 21:13:33 +0000 (21:13 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Mon, 20 Apr 2009 21:13:33 +0000 (21:13 +0000)
diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py

index ce18ad2bdeab2059dd79b0214113a3caeee81d7f..514a98e74caea0ff351f9ec9f9d2eadf1ecde43e 100644 (file)
--- a/Lib/test/test_int.py
+++ b/Lib/test/test_int.py
@@ -275,6 +275,40 @@ class IntTestCases(unittest.TestCase):
              self.assertEqual((a+1).bit_length(), i+1)
              self.assertEqual((-a-1).bit_length(), i+1)
  
+    @unittest.skipUnless(float.__getformat__("double").startswith("IEEE"),
+                         "test requires IEEE 754 doubles")
+    def test_float_conversion(self):
+        # values exactly representable as floats
+        exact_values = [-2, -1, 0, 1, 2, 2**52, 2**53-1, 2**53, 2**53+2,
+                         2**53+4, 2**54-4, 2**54-2, 2**63, -2**63, 2**64,
+                         -2**64, 10**20, 10**21, 10**22]
+        for value in exact_values:
+            self.assertEqual(int(float(int(value))), value)
+
+        # test round-half-to-even
+        self.assertEqual(int(float(2**53+1)), 2**53)
+        self.assertEqual(int(float(2**53+2)), 2**53+2)
+        self.assertEqual(int(float(2**53+3)), 2**53+4)
+        self.assertEqual(int(float(2**53+5)), 2**53+4)
+        self.assertEqual(int(float(2**53+6)), 2**53+6)
+        self.assertEqual(int(float(2**53+7)), 2**53+8)
+
+        self.assertEqual(int(float(-2**53-1)), -2**53)
+        self.assertEqual(int(float(-2**53-2)), -2**53-2)
+        self.assertEqual(int(float(-2**53-3)), -2**53-4)
+        self.assertEqual(int(float(-2**53-5)), -2**53-4)
+        self.assertEqual(int(float(-2**53-6)), -2**53-6)
+        self.assertEqual(int(float(-2**53-7)), -2**53-8)
+
+        self.assertEqual(int(float(2**54-2)), 2**54-2)
+        self.assertEqual(int(float(2**54-1)), 2**54)
+        self.assertEqual(int(float(2**54+2)), 2**54)
+        self.assertEqual(int(float(2**54+3)), 2**54+4)
+        self.assertEqual(int(float(2**54+5)), 2**54+4)
+        self.assertEqual(int(float(2**54+6)), 2**54+8)
+        self.assertEqual(int(float(2**54+10)), 2**54+8)
+        self.assertEqual(int(float(2**54+11)), 2**54+12)
+
      def test_intconversion(self):
          # Test __int__()
          class ClassicMissingMethods:
diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py

index 1a0e33dd57a32214e705f98186ca3d6d4d0d5fd7..b41d270c18ca4b69b9c132cea2bae6c1320700ee 100644 (file)
--- a/Lib/test/test_long.py
+++ b/Lib/test/test_long.py
@@ -645,6 +645,65 @@ class LongTest(unittest.TestCase):
                              else:
                                  self.assertRaises(TypeError, pow,longx, longy, long(z))
  
+    @unittest.skipUnless(float.__getformat__("double").startswith("IEEE"),
+                         "test requires IEEE 754 doubles")
+    def test_float_conversion(self):
+        import sys
+        DBL_MAX = sys.float_info.max
+        DBL_MAX_EXP = sys.float_info.max_exp
+        DBL_MANT_DIG = sys.float_info.mant_dig
+
+        exact_values = [0L, 1L, 2L,
+                         long(2**53-3),
+                         long(2**53-2),
+                         long(2**53-1),
+                         long(2**53),
+                         long(2**53+2),
+                         long(2**54-4),
+                         long(2**54-2),
+                         long(2**54),
+                         long(2**54+4)]
+        for x in exact_values:
+            self.assertEqual(long(float(x)), x)
+            self.assertEqual(long(float(-x)), -x)
+
+        # test round-half-even
+        for x, y in [(1, 0), (2, 2), (3, 4), (4, 4), (5, 4), (6, 6), (7, 8)]:
+            for p in xrange(15):
+                self.assertEqual(long(float(2L**p*(2**53+x))), 2L**p*(2**53+y))
+
+        for x, y in [(0, 0), (1, 0), (2, 0), (3, 4), (4, 4), (5, 4), (6, 8),
+                     (7, 8), (8, 8), (9, 8), (10, 8), (11, 12), (12, 12),
+                     (13, 12), (14, 16), (15, 16)]:
+            for p in xrange(15):
+                self.assertEqual(long(float(2L**p*(2**54+x))), 2L**p*(2**54+y))
+
+        # behaviour near extremes of floating-point range
+        long_dbl_max = long(DBL_MAX)
+        top_power = 2**DBL_MAX_EXP
+        halfway = (long_dbl_max + top_power)/2
+        self.assertEqual(float(long_dbl_max), DBL_MAX)
+        self.assertEqual(float(long_dbl_max+1), DBL_MAX)
+        self.assertEqual(float(halfway-1), DBL_MAX)
+        self.assertRaises(OverflowError, float, halfway)
+        self.assertEqual(float(1-halfway), -DBL_MAX)
+        self.assertRaises(OverflowError, float, -halfway)
+        self.assertRaises(OverflowError, float, top_power-1)
+        self.assertRaises(OverflowError, float, top_power)
+        self.assertRaises(OverflowError, float, top_power+1)
+        self.assertRaises(OverflowError, float, 2*top_power-1)
+        self.assertRaises(OverflowError, float, 2*top_power)
+        self.assertRaises(OverflowError, float, top_power*top_power)
+
+        for p in xrange(100):
+            x = long(2**p * (2**53 + 1) + 1)
+            y = long(2**p * (2**53+ 2))
+            self.assertEqual(long(float(x)), y)
+
+            x = long(2**p * (2**53 + 1))
+            y = long(2**p * 2**53)
+            self.assertEqual(long(float(x)), y)
+
      def test_float_overflow(self):
          import math
  
diff --git a/Misc/NEWS b/Misc/NEWS

index 9a5387d200efdf9f761bdc170c52c234000c5a3a..d4dc2dcc61e781ef4d9ae750c0248b6497d7c057 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 1
  Core and Builtins
  -----------------
  
+- Issue #3166: Make long -> float (and int -> float) conversions
+  correctly rounded.
+
  - Issue #5787: object.__getattribute__(some_type, "__bases__") segfaulted on
    some builtin types.
  
diff --git a/Objects/intobject.c b/Objects/intobject.c

index d4532f49a67bb38c7e31aadf266151599e4e37bc..1b64fe7c106795badc759b6f44604862145bf36e 100644 (file)
--- a/Objects/intobject.c
+++ b/Objects/intobject.c
@@ -3,6 +3,7 @@
  
  #include "Python.h"
  #include <ctype.h>
+#include <float.h>
  
  static PyObject *int_int(PyIntObject *v);
  
@@ -928,12 +929,78 @@ int_long(PyIntObject *v)
         return PyLong_FromLong((v -> ob_ival));
  }
  
+static const unsigned char BitLengthTable[32] = {
+       0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
+};
+
+static int
+bits_in_ulong(unsigned long d)
+{
+       int d_bits = 0;
+       while (d >= 32) {
+               d_bits += 6;
+               d >>= 6;
+       }
+       d_bits += (int)BitLengthTable[d];
+       return d_bits;
+}
+
+#if 8*SIZEOF_LONG-1 <= DBL_MANT_DIG
+/* Every Python int can be exactly represented as a float. */
+
  static PyObject *
  int_float(PyIntObject *v)
  {
         return PyFloat_FromDouble((double)(v -> ob_ival));
  }
  
+#else
+/* Here not all Python ints are exactly representable as floats, so we may
+   have to round.  We do this manually, since the C standards don't specify
+   whether converting an integer to a float rounds up or down */
+
+static PyObject *
+int_float(PyIntObject *v)
+{
+       unsigned long abs_ival, lsb;
+       int round_up;
+
+       if (v->ob_ival < 0)
+               abs_ival = 0U-(unsigned long)v->ob_ival;
+       else
+               abs_ival = (unsigned long)v->ob_ival;
+       if (abs_ival < (1L << DBL_MANT_DIG))
+               /* small integer;  no need to round */
+               return PyFloat_FromDouble((double)v->ob_ival);
+
+       /* Round abs_ival to MANT_DIG significant bits, using the
+          round-half-to-even rule.  abs_ival & lsb picks out the 'rounding'
+          bit: the first bit after the most significant MANT_DIG bits of
+          abs_ival.  We round up if this bit is set, provided that either:
+
+            (1) abs_ival isn't exactly halfway between two floats, in which
+            case at least one of the bits following the rounding bit must be
+            set; i.e., abs_ival & lsb-1 != 0, or:
+
+            (2) the resulting rounded value has least significant bit 0; or
+            in other words the bit above the rounding bit is set (this is the
+            'to-even' bit of round-half-to-even); i.e., abs_ival & 2*lsb != 0
+
+          The condition "(1) or (2)" equates to abs_ival & 3*lsb-1 != 0. */
+
+       lsb = 1L << (bits_in_ulong(abs_ival)-DBL_MANT_DIG-1);
+       round_up = (abs_ival & lsb) && (abs_ival & (3*lsb-1));
+       abs_ival &= -2*lsb;
+       if (round_up)
+               abs_ival += 2*lsb;
+       return PyFloat_FromDouble(v->ob_ival < 0 ?
+                                 -(double)abs_ival :
+                                 (double)abs_ival);
+}
+
+#endif
+
  static PyObject *
  int_oct(PyIntObject *v)
  {
@@ -1139,16 +1206,10 @@ int__format__(PyObject *self, PyObject *args)
         return NULL;
  }
  
-static const unsigned char BitLengthTable[32] = {
-       0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-       5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
-};
-
  static PyObject *
  int_bit_length(PyIntObject *v)
  {
         unsigned long n;
-       long r = 0;
  
         if (v->ob_ival < 0)
                 /* avoid undefined behaviour when v->ob_ival == -LONG_MAX-1 */
@@ -1156,12 +1217,7 @@ int_bit_length(PyIntObject *v)
         else
                 n = (unsigned long)v->ob_ival;
  
-       while (n >= 32) {
-               r += 6;
-               n >>= 6;
-       }
-       r += (long)(BitLengthTable[n]);
-       return PyInt_FromLong(r);
+       return PyInt_FromLong(bits_in_ulong(n));
  }
  
  PyDoc_STRVAR(int_bit_length_doc,
diff --git a/Objects/longobject.c b/Objects/longobject.c

index dd22ce03fbb63c3d46f294989b95a5a1b2d5ecce..4b6502de7f29dfe6f03123e483a4e7e3c1614b22 100644 (file)
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -8,6 +8,7 @@
  #include "longintrepr.h"
  #include "structseq.h"
  
+#include <float.h>
  #include <ctype.h>
  #include <stddef.h>
  
@@ -38,6 +39,9 @@
                 if (PyErr_CheckSignals()) PyTryBlock \
         }
  
+/* forward declaration */
+static int bits_in_digit(digit d);
+
  /* Normalize (remove leading zeros from) a long int object.
     Doesn't attempt to free the storage--in most cases, due to the nature
     of the algorithms used, this could save at most be one word anyway. */
@@ -729,33 +733,166 @@ _PyLong_AsScaledDouble(PyObject *vv, int *exponent)
  #undef NBITS_WANTED
  }
  
-/* Get a C double from a long int object. */
+/* Get a C double from a long int object.  Rounds to the nearest double,
+   using the round-half-to-even rule in the case of a tie. */
  
  double
  PyLong_AsDouble(PyObject *vv)
  {
-       int e = -1;
+       PyLongObject *v = (PyLongObject *)vv;
+       Py_ssize_t rnd_digit, rnd_bit, m, n;
+       digit lsb, *d;
+       int round_up = 0;
         double x;
  
         if (vv == NULL || !PyLong_Check(vv)) {
                 PyErr_BadInternalCall();
-               return -1;
-       }
-       x = _PyLong_AsScaledDouble(vv, &e);
-       if (x == -1.0 && PyErr_Occurred())
                 return -1.0;
-       /* 'e' initialized to -1 to silence gcc-4.0.x, but it should be
-          set correctly after a successful _PyLong_AsScaledDouble() call */
-       assert(e >= 0);
-       if (e > INT_MAX / PyLong_SHIFT)
+       }
+
+       /* Notes on the method: for simplicity, assume v is positive and >=
+          2**DBL_MANT_DIG. (For negative v we just ignore the sign until the
+          end; for small v no rounding is necessary.)  Write n for the number
+          of bits in v, so that 2**(n-1) <= v < 2**n, and n > DBL_MANT_DIG.
+
+          Some terminology: the *rounding bit* of v is the 1st bit of v that
+          will be rounded away (bit n - DBL_MANT_DIG - 1); the *parity bit*
+          is the bit immediately above.  The round-half-to-even rule says
+          that we round up if the rounding bit is set, unless v is exactly
+          halfway between two floats and the parity bit is zero.
+
+          Write d[0] ... d[m] for the digits of v, least to most significant.
+          Let rnd_bit be the index of the rounding bit, and rnd_digit the
+          index of the PyLong digit containing the rounding bit.  Then the
+          bits of the digit d[rnd_digit] look something like:
+
+                     rounding bit
+                         |
+                         v
+             msb -> sssssrttttttttt <- lsb
+                        ^
+                        |
+                     parity bit
+
+          where 's' represents a 'significant bit' that will be included in
+          the mantissa of the result, 'r' is the rounding bit, and 't'
+          represents a 'trailing bit' following the rounding bit.  Note that
+          if the rounding bit is at the top of d[rnd_digit] then the parity
+          bit will be the lsb of d[rnd_digit+1].  If we set
+
+             lsb = 1 << (rnd_bit % PyLong_SHIFT)
+
+          then d[rnd_digit] & (PyLong_BASE - 2*lsb) selects just the
+          significant bits of d[rnd_digit], d[rnd_digit] & (lsb-1) gets the
+          trailing bits, and d[rnd_digit] & lsb gives the rounding bit.
+
+          We initialize the double x to the integer given by digits
+          d[rnd_digit:m-1], but with the rounding bit and trailing bits of
+          d[rnd_digit] masked out.  So the value of x comes from the top
+          DBL_MANT_DIG bits of v, multiplied by 2*lsb.  Note that in the loop
+          that produces x, all floating-point operations are exact (assuming
+          that FLT_RADIX==2).  Now if we're rounding down, the value we want
+          to return is simply
+
+             x * 2**(PyLong_SHIFT * rnd_digit).
+
+          and if we're rounding up, it's
+
+             (x + 2*lsb) * 2**(PyLong_SHIFT * rnd_digit).
+
+          Under the round-half-to-even rule, we round up if, and only
+          if, the rounding bit is set *and* at least one of the
+          following three conditions is satisfied:
+
+             (1) the parity bit is set, or
+             (2) at least one of the trailing bits of d[rnd_digit] is set, or
+             (3) at least one of the digits d[i], 0 <= i < rnd_digit
+                is nonzero.
+
+          Finally, we have to worry about overflow.  If v >= 2**DBL_MAX_EXP,
+          or equivalently n > DBL_MAX_EXP, then overflow occurs.  If v <
+          2**DBL_MAX_EXP then we're usually safe, but there's a corner case
+          to consider: if v is very close to 2**DBL_MAX_EXP then it's
+          possible that v is rounded up to exactly 2**DBL_MAX_EXP, and then
+          again overflow occurs.
+       */
+
+       if (Py_SIZE(v) == 0)
+               return 0.0;
+       m = ABS(Py_SIZE(v)) - 1;
+       d = v->ob_digit;
+       assert(d[m]);  /* v should be normalized */
+
+       /* fast path for case where 0 < abs(v) < 2**DBL_MANT_DIG */
+       if (m < DBL_MANT_DIG / PyLong_SHIFT ||
+           (m == DBL_MANT_DIG / PyLong_SHIFT &&
+            d[m] < (digit)1 << DBL_MANT_DIG%PyLong_SHIFT)) {
+               x = d[m];
+               while (--m >= 0)
+                       x = x*PyLong_BASE + d[m];
+               return Py_SIZE(v) < 0 ? -x : x;
+       }
+
+       /* if m is huge then overflow immediately; otherwise, compute the
+          number of bits n in v.  The condition below implies n (= #bits) >=
+          m * PyLong_SHIFT + 1 > DBL_MAX_EXP, hence v >= 2**DBL_MAX_EXP. */
+       if (m > (DBL_MAX_EXP-1)/PyLong_SHIFT)
                 goto overflow;
-       errno = 0;
-       x = ldexp(x, e * PyLong_SHIFT);
-       if (Py_OVERFLOWED(x))
+       n = m * PyLong_SHIFT + bits_in_digit(d[m]);
+       if (n > DBL_MAX_EXP)
                 goto overflow;
-       return x;
  
-overflow:
+       /* find location of rounding bit */
+       assert(n > DBL_MANT_DIG); /* dealt with |v| < 2**DBL_MANT_DIG above */
+       rnd_bit = n - DBL_MANT_DIG - 1;
+       rnd_digit = rnd_bit/PyLong_SHIFT;
+       lsb = (digit)1 << (rnd_bit%PyLong_SHIFT);
+
+       /* Get top DBL_MANT_DIG bits of v.  Assumes PyLong_SHIFT <
+          DBL_MANT_DIG, so we'll need bits from at least 2 digits of v. */
+       x = d[m];
+       assert(m > rnd_digit);
+       while (--m > rnd_digit)
+               x = x*PyLong_BASE + d[m];
+       x = x*PyLong_BASE + (d[m] & (PyLong_BASE-2*lsb));
+
+       /* decide whether to round up, using round-half-to-even */
+       assert(m == rnd_digit);
+       if (d[m] & lsb) { /* if (rounding bit is set) */
+               digit parity_bit;
+               if (lsb == PyLong_BASE/2)
+                       parity_bit = d[m+1] & 1;
+               else
+                       parity_bit = d[m] & 2*lsb;
+               if (parity_bit)
+                       round_up = 1;
+               else if (d[m] & (lsb-1))
+                       round_up = 1;
+               else {
+                       while (--m >= 0) {
+                               if (d[m]) {
+                                       round_up = 1;
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       /* and round up if necessary */
+       if (round_up) {
+               x += 2*lsb;
+               if (n == DBL_MAX_EXP &&
+                   x == ldexp((double)(2*lsb), DBL_MANT_DIG)) {
+                       /* overflow corner case */
+                       goto overflow;
+               }
+       }
+
+       /* shift, adjust for sign, and return */
+       x = ldexp(x, rnd_digit*PyLong_SHIFT);
+       return Py_SIZE(v) < 0 ? -x : x;
+
+  overflow:
         PyErr_SetString(PyExc_OverflowError,
                 "long int too large to convert to float");
         return -1.0;
author	Mark Dickinson <dickinsm@gmail.com>
	Mon, 20 Apr 2009 21:13:33 +0000 (21:13 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Mon, 20 Apr 2009 21:13:33 +0000 (21:13 +0000)
Lib/test/test_int.py		patch \| blob \| history
Lib/test/test_long.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/intobject.c		patch \| blob \| history
Objects/longobject.c		patch \| blob \| history