Issue #7117, continued: Change round implementation to use the correctly-rounded

author Mark Dickinson <dickinsm@gmail.com>

Wed, 18 Nov 2009 19:33:35 +0000 (19:33 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Wed, 18 Nov 2009 19:33:35 +0000 (19:33 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Wed, 18 Nov 2009 19:33:35 +0000 (19:33 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Wed, 18 Nov 2009 19:33:35 +0000 (19:33 +0000)
diff --git a/Include/floatobject.h b/Include/floatobject.h

index 6c11036190aa44fbcd0dbdc340283d4d0d49a828..54e88256a2e1cb83c6c3cd9756d02ef06fb91306 100644 (file)
--- a/Include/floatobject.h
+++ b/Include/floatobject.h
@@ -127,6 +127,13 @@ PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj,
                                                char *format_spec,
                                                Py_ssize_t format_spec_len);
  
+/* Round a C double x to the closest multiple of 10**-ndigits.  Returns a
+   Python float on success, or NULL (with an appropriate exception set) on
+   failure.  Used in builtin_round in bltinmodule.c. */
+PyAPI_FUNC(PyObject *) _Py_double_round(double x, int ndigits);
+
+
+
  #ifdef __cplusplus
  }
  #endif
diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py

index 86dae3f690a47e7156c4aec54696ab137097793d..573cc7e6e1d24864c561d3425e1b1dff0c2b7fa7 100644 (file)
--- a/Lib/test/test_float.py
+++ b/Lib/test/test_float.py
@@ -5,7 +5,9 @@ from test import test_support
  import math
  from math import isinf, isnan, copysign, ldexp
  import operator
-import random, fractions
+import random
+import fractions
+import sys
  
  INF = float("inf")
  NAN = float("nan")
@@ -339,6 +341,141 @@ class ReprTestCase(unittest.TestCase):
              self.assertEqual(v, eval(repr(v)))
          floats_file.close()
  
+@unittest.skipUnless(float.__getformat__("double").startswith("IEEE"),
+                     "test requires IEEE 754 doubles")
+class RoundTestCase(unittest.TestCase):
+    def test_second_argument_type(self):
+        # any type with an __index__ method should be permitted as
+        # a second argument
+        self.assertAlmostEqual(round(12.34, True), 12.3)
+
+        class MyIndex(object):
+            def __index__(self): return 4
+        self.assertAlmostEqual(round(-0.123456, MyIndex()), -0.1235)
+        # but floats should be illegal
+        self.assertRaises(TypeError, round, 3.14159, 2.0)
+
+    def test_inf_nan(self):
+        # rounding an infinity or nan returns the same number;
+        # (in py3k, rounding an infinity or nan raises an error,
+        #  since the result can't be represented as a long).
+        self.assertEqual(round(INF), INF)
+        self.assertEqual(round(-INF), -INF)
+        self.assertTrue(math.isnan(round(NAN)))
+        for n in range(-5, 5):
+            self.assertEqual(round(INF, n), INF)
+            self.assertEqual(round(-INF, n), -INF)
+            self.assertTrue(math.isnan(round(NAN, n)))
+
+    def test_large_n(self):
+        for n in [324, 325, 400, 2**31-1, 2**31, 2**32, 2**100]:
+            self.assertEqual(round(123.456, n), 123.456)
+            self.assertEqual(round(-123.456, n), -123.456)
+            self.assertEqual(round(1e300, n), 1e300)
+            self.assertEqual(round(1e-320, n), 1e-320)
+        self.assertEqual(round(1e150, 300), 1e150)
+        self.assertEqual(round(1e300, 307), 1e300)
+        self.assertEqual(round(-3.1415, 308), -3.1415)
+        self.assertEqual(round(1e150, 309), 1e150)
+        self.assertEqual(round(1.4e-315, 315), 1e-315)
+
+    def test_small_n(self):
+        for n in [-308, -309, -400, 1-2**31, -2**31, -2**31-1, -2**100]:
+            self.assertEqual(round(123.456, n), 0.0)
+            self.assertEqual(round(-123.456, n), -0.0)
+            self.assertEqual(round(1e300, n), 0.0)
+            self.assertEqual(round(1e-320, n), 0.0)
+
+    def test_overflow(self):
+        self.assertRaises(OverflowError, round, 1.6e308, -308)
+        self.assertRaises(OverflowError, round, -1.7e308, -308)
+
+    @unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short',
+                         "test applies only when using short float repr style")
+    def test_previous_round_bugs(self):
+        # particular cases that have occurred in bug reports
+        self.assertEqual(round(562949953421312.5, 1),
+                          562949953421312.5)
+        self.assertEqual(round(56294995342131.5, 3),
+                         56294995342131.5)
+
+    @unittest.skipUnless(getattr(sys, 'float_repr_style', '') == 'short',
+                         "test applies only when using short float repr style")
+    def test_halfway_cases(self):
+        # Halfway cases need special attention, since the current
+        # implementation has to deal with them specially.  Note that
+        # 2.x rounds halfway values up (i.e., away from zero) while
+        # 3.x does round-half-to-even.
+        self.assertAlmostEqual(round(0.125, 2), 0.13)
+        self.assertAlmostEqual(round(0.375, 2), 0.38)
+        self.assertAlmostEqual(round(0.625, 2), 0.63)
+        self.assertAlmostEqual(round(0.875, 2), 0.88)
+        self.assertAlmostEqual(round(-0.125, 2), -0.13)
+        self.assertAlmostEqual(round(-0.375, 2), -0.38)
+        self.assertAlmostEqual(round(-0.625, 2), -0.63)
+        self.assertAlmostEqual(round(-0.875, 2), -0.88)
+
+        self.assertAlmostEqual(round(0.25, 1), 0.3)
+        self.assertAlmostEqual(round(0.75, 1), 0.8)
+        self.assertAlmostEqual(round(-0.25, 1), -0.3)
+        self.assertAlmostEqual(round(-0.75, 1), -0.8)
+
+        self.assertEqual(round(-6.5, 0), -7.0)
+        self.assertEqual(round(-5.5, 0), -6.0)
+        self.assertEqual(round(-1.5, 0), -2.0)
+        self.assertEqual(round(-0.5, 0), -1.0)
+        self.assertEqual(round(0.5, 0), 1.0)
+        self.assertEqual(round(1.5, 0), 2.0)
+        self.assertEqual(round(2.5, 0), 3.0)
+        self.assertEqual(round(3.5, 0), 4.0)
+        self.assertEqual(round(4.5, 0), 5.0)
+        self.assertEqual(round(5.5, 0), 6.0)
+        self.assertEqual(round(6.5, 0), 7.0)
+
+        # same but without an explicit second argument; in 3.x these
+        # will give integers
+        self.assertEqual(round(-6.5), -7.0)
+        self.assertEqual(round(-5.5), -6.0)
+        self.assertEqual(round(-1.5), -2.0)
+        self.assertEqual(round(-0.5), -1.0)
+        self.assertEqual(round(0.5), 1.0)
+        self.assertEqual(round(1.5), 2.0)
+        self.assertEqual(round(2.5), 3.0)
+        self.assertEqual(round(3.5), 4.0)
+        self.assertEqual(round(4.5), 5.0)
+        self.assertEqual(round(5.5), 6.0)
+        self.assertEqual(round(6.5), 7.0)
+
+        self.assertEqual(round(-25.0, -1), -30.0)
+        self.assertEqual(round(-15.0, -1), -20.0)
+        self.assertEqual(round(-5.0, -1), -10.0)
+        self.assertEqual(round(5.0, -1), 10.0)
+        self.assertEqual(round(15.0, -1), 20.0)
+        self.assertEqual(round(25.0, -1), 30.0)
+        self.assertEqual(round(35.0, -1), 40.0)
+        self.assertEqual(round(45.0, -1), 50.0)
+        self.assertEqual(round(55.0, -1), 60.0)
+        self.assertEqual(round(65.0, -1), 70.0)
+        self.assertEqual(round(75.0, -1), 80.0)
+        self.assertEqual(round(85.0, -1), 90.0)
+        self.assertEqual(round(95.0, -1), 100.0)
+        self.assertEqual(round(12325.0, -1), 12330.0)
+
+        self.assertEqual(round(350.0, -2), 400.0)
+        self.assertEqual(round(450.0, -2), 500.0)
+
+        self.assertAlmostEqual(round(0.5e21, -21), 1e21)
+        self.assertAlmostEqual(round(1.5e21, -21), 2e21)
+        self.assertAlmostEqual(round(2.5e21, -21), 3e21)
+        self.assertAlmostEqual(round(5.5e21, -21), 6e21)
+        self.assertAlmostEqual(round(8.5e21, -21), 9e21)
+
+        self.assertAlmostEqual(round(-1.5e22, -22), -2e22)
+        self.assertAlmostEqual(round(-0.5e22, -22), -1e22)
+        self.assertAlmostEqual(round(0.5e22, -22), 1e22)
+        self.assertAlmostEqual(round(1.5e22, -22), 2e22)
+
+
  # Beginning with Python 2.6 float has cross platform compatible
  # ways to create and represent inf and nan
  class InfNanTest(unittest.TestCase):
@@ -859,6 +996,7 @@ def test_main():
          UnknownFormatTestCase,
          IEEEFormatTestCase,
          ReprTestCase,
+        RoundTestCase,
          InfNanTest,
          HexFloatTestCase,
          )
diff --git a/Misc/NEWS b/Misc/NEWS

index 0e76ccb60dea7b1dea517b7dc48a37aa3b04beb1..a52e28aad1a6f18d695693b1328cdf446d9f527e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,15 @@ What's New in Python 2.7 alpha 1
  Core and Builtins
  -----------------
  
+- Issue #7117: Backport round implementation from Python 3.x.  round
+  now uses David Gay's correctly-rounded string <-> double conversions
+  (when available), and so produces correctly rounded results.  There
+  are two related small changes: (1) round now accepts any class with
+  an __index__ method for its second argument (but no longer accepts
+  floats for the second argument), and (2) an excessively large second
+  integer argument (e.g., round(1.234, 10**100)) no longer raises an
+  exception.
+
  - Issue #1757126: Fix the cyrillic-asian alias for the ptcp154 encoding.
  
  - Fix several issues with compile().  The input can now contain Windows and Mac
diff --git a/Objects/floatobject.c b/Objects/floatobject.c

index 28b2004bce2ee6c4867b06193e7c44705a86e407..461029a91e7893b483f6253bcbdc7a6f509b6a89 100644 (file)
--- a/Objects/floatobject.c
+++ b/Objects/floatobject.c
@@ -999,6 +999,202 @@ float_long(PyObject *v)
         return PyLong_FromDouble(x);
  }
  
+/* _Py_double_round: rounds a finite nonzero double to the closest multiple of
+   10**-ndigits; here ndigits is within reasonable bounds (typically, -308 <=
+   ndigits <= 323).  Returns a Python float, or sets a Python error and
+   returns NULL on failure (OverflowError and memory errors are possible). */
+
+#ifndef PY_NO_SHORT_FLOAT_REPR
+/* version of _Py_double_round that uses the correctly-rounded string<->double
+   conversions from Python/dtoa.c */
+
+/* FIVE_POW_LIMIT is the largest k such that 5**k is exactly representable as
+   a double.  Since we're using the code in Python/dtoa.c, it should be safe
+   to assume that C doubles are IEEE 754 binary64 format.  To be on the safe
+   side, we check this. */
+#if DBL_MANT_DIG == 53
+#define FIVE_POW_LIMIT 22
+#else
+#error "C doubles do not appear to be IEEE 754 binary64 format"
+#endif
+
+PyObject *
+_Py_double_round(double x, int ndigits) {
+
+       double rounded, m;
+       Py_ssize_t buflen, mybuflen=100;
+       char *buf, *buf_end, shortbuf[100], *mybuf=shortbuf;
+       int decpt, sign, val, halfway_case;
+       PyObject *result = NULL;
+
+       /* The basic idea is very simple: convert and round the double to a
+          decimal string using _Py_dg_dtoa, then convert that decimal string
+          back to a double with _Py_dg_strtod.  There's one minor difficulty:
+          Python 2.x expects round to do round-half-away-from-zero, while
+          _Py_dg_dtoa does round-half-to-even.  So we need some way to detect
+          and correct the halfway cases.
+
+          Detection: a halfway value has the form k * 0.5 * 10**-ndigits for
+          some odd integer k.  Or in other words, a rational number x is
+          exactly halfway between two multiples of 10**-ndigits if its
+          2-valuation is exactly -ndigits-1 and its 5-valuation is at least
+          -ndigits.  For ndigits >= 0 the latter condition is automatically
+          satisfied for a binary float x, since any such float has
+          nonnegative 5-valuation.  For 0 > ndigits >= -22, x needs to be an
+          integral multiple of 5**-ndigits; we can check this using fmod.
+          For -22 > ndigits, there are no halfway cases: 5**23 takes 54 bits
+          to represent exactly, so any odd multiple of 0.5 * 10**n for n >=
+          23 takes at least 54 bits of precision to represent exactly.
+
+          Correction: a simple strategy for dealing with halfway cases is to
+          (for the halfway cases only) call _Py_dg_dtoa with an argument of
+          ndigits+1 instead of ndigits (thus doing an exact conversion to
+          decimal), round the resulting string manually, and then convert
+          back using _Py_dg_strtod.
+       */
+
+       /* nans, infinities and zeros should have already been dealt
+          with by the caller (in this case, builtin_round) */
+       assert(Py_IS_FINITE(x) && x != 0.0);
+
+       /* find 2-valuation val of x */
+       m = frexp(x, &val);
+       while (m != floor(m)) {
+               m *= 2.0;
+               val--;
+       }
+
+       /* determine whether this is a halfway case */
+       if (val == -ndigits-1) {
+               if (ndigits >= 0)
+                       halfway_case = 1;
+               else if (ndigits >= -FIVE_POW_LIMIT) {
+                       double five_pow = 1.0;
+                       int i;
+                       for (i=0; i < -ndigits; i++)
+                               five_pow *= 5.0;
+                       halfway_case = fmod(x, five_pow) == 0.0;
+               }
+               else
+                       halfway_case = 0;
+       }
+       else
+               halfway_case = 0;
+
+       /* round to a decimal string; use an extra place for halfway case */
+       buf = _Py_dg_dtoa(x, 3, ndigits+halfway_case, &decpt, &sign, &buf_end);
+       if (buf == NULL) {
+               PyErr_NoMemory();
+               return NULL;
+       }
+       buflen = buf_end - buf;
+
+       /* in halfway case, do the round-half-away-from-zero manually */
+       if (halfway_case) {
+               int i, carry;
+               /* sanity check: _Py_dg_dtoa should not have stripped
+                  any zeros from the result: there should be exactly
+                  ndigits+1 places following the decimal point, and
+                  the last digit in the buffer should be a '5'.*/
+               assert(buflen - decpt == ndigits+1);
+               assert(buf[buflen-1] == '5');
+
+               /* increment and shift right at the same time. */
+               decpt += 1;
+               carry = 1;
+               for (i=buflen-1; i-- > 0;) {
+                       carry += buf[i] - '0';
+                       buf[i+1] = carry % 10 + '0';
+                       carry /= 10;
+               }
+               buf[0] = carry + '0';
+       }
+
+       /* Get new buffer if shortbuf is too small.  Space needed <= buf_end -
+          buf + 8: (1 extra for '0', 1 for sign, 5 for exp, 1 for '\0'). */
+       if (buflen + 8 > mybuflen) {
+               mybuflen = buflen+8;
+               mybuf = (char *)PyMem_Malloc(mybuflen);
+               if (mybuf == NULL) {
+                       PyErr_NoMemory();
+                       goto exit;
+               }
+       }
+       /* copy buf to mybuf, adding exponent, sign and leading 0 */
+       PyOS_snprintf(mybuf, mybuflen, "%s0%se%d", (sign ? "-" : ""),
+                     buf, decpt - (int)buflen);
+
+       /* and convert the resulting string back to a double */
+       errno = 0;
+       rounded = _Py_dg_strtod(mybuf, NULL);
+       if (errno == ERANGE && fabs(rounded) >= 1.)
+               PyErr_SetString(PyExc_OverflowError,
+                               "rounded value too large to represent");
+       else
+               result = PyFloat_FromDouble(rounded);
+
+       /* done computing value;  now clean up */
+       if (mybuf != shortbuf)
+               PyMem_Free(mybuf);
+  exit:
+       _Py_dg_freedtoa(buf);
+       return result;
+}
+
+#undef FIVE_POW_LIMIT
+
+#else /* PY_NO_SHORT_FLOAT_REPR */
+
+/* fallback version, to be used when correctly rounded binary<->decimal
+   conversions aren't available */
+
+PyObject *
+_Py_double_round(double x, int ndigits) {
+       double pow1, pow2, y, z;
+       if (ndigits >= 0) {
+               if (ndigits > 22) {
+                       /* pow1 and pow2 are each safe from overflow, but
+                          pow1*pow2 ~= pow(10.0, ndigits) might overflow */
+                       pow1 = pow(10.0, (double)(ndigits-22));
+                       pow2 = 1e22;
+               }
+               else {
+                       pow1 = pow(10.0, (double)ndigits);
+                       pow2 = 1.0;
+               }
+               y = (x*pow1)*pow2;
+               /* if y overflows, then rounded value is exactly x */
+               if (!Py_IS_FINITE(y))
+                       return PyFloat_FromDouble(x);
+       }
+       else {
+               pow1 = pow(10.0, (double)-ndigits);
+               pow2 = 1.0; /* unused; silences a gcc compiler warning */
+               y = x / pow1;
+       }
+
+       z = round(y);
+       if (fabs(y-z) == 0.5)
+               /* halfway between two integers; use round-away-from-zero */
+               z = y + copysign(0.5, y);
+
+       if (ndigits >= 0)
+               z = (z / pow2) / pow1;
+       else
+               z *= pow1;
+
+       /* if computation resulted in overflow, raise OverflowError */
+       if (!Py_IS_FINITE(z)) {
+               PyErr_SetString(PyExc_OverflowError,
+                               "overflow occurred during round");
+               return NULL;
+       }
+
+       return PyFloat_FromDouble(z);
+}
+
+#endif /* PY_NO_SHORT_FLOAT_REPR */
+
  static PyObject *
  float_float(PyObject *v)
  {
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c

index fcae58afcc4fe67dc172f8cd9742bbe4c78cc597..a8d8d97399c67ecc0c0f6e246af27a8af9950c46 100644 (file)
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -8,6 +8,7 @@
  #include "eval.h"
  
  #include <ctype.h>
+#include <float.h> /* for DBL_MANT_DIG and friends */
  
  #ifdef RISCOS
  #include "unixstuff.h"
@@ -2120,29 +2121,47 @@ For most object types, eval(repr(object)) == object.");
  static PyObject *
  builtin_round(PyObject *self, PyObject *args, PyObject *kwds)
  {
-       double number;
-       double f;
-       int ndigits = 0;
-       int i;
+       double x;
+       PyObject *o_ndigits = NULL;
+       Py_ssize_t ndigits;
         static char *kwlist[] = {"number", "ndigits", 0};
  
-       if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|i:round",
-               kwlist, &number, &ndigits))
+       if (!PyArg_ParseTupleAndKeywords(args, kwds, "d|O:round",
+               kwlist, &x, &o_ndigits))
                 return NULL;
-       f = 1.0;
-       i = abs(ndigits);
-       while  (--i >= 0)
-               f = f*10.0;
-       if (ndigits < 0)
-               number /= f;
-       else
-               number *= f;
-       number = round(number);
-       if (ndigits < 0)
-               number *= f;
+
+       /* nans, infinities and zeros round to themselves */
+       if (!Py_IS_FINITE(x) || x == 0.0)
+               return PyFloat_FromDouble(x);
+
+       if (o_ndigits == NULL) {
+               /* second argument defaults to 0 */
+               ndigits = 0;
+       }
+       else {
+               /* interpret 2nd argument as a Py_ssize_t; clip on overflow */
+               ndigits = PyNumber_AsSsize_t(o_ndigits, NULL);
+               if (ndigits == -1 && PyErr_Occurred())
+                       return NULL;
+       }
+
+       /* Deal with extreme values for ndigits. For ndigits > NDIGITS_MAX, x
+          always rounds to itself.  For ndigits < NDIGITS_MIN, x always
+          rounds to +-0.0.  Here 0.30103 is an upper bound for log10(2). */
+#define NDIGITS_MAX ((int)((DBL_MANT_DIG-DBL_MIN_EXP) * 0.30103))
+#define NDIGITS_MIN (-(int)((DBL_MAX_EXP + 1) * 0.30103))
+       if (ndigits > NDIGITS_MAX)
+               /* return x */
+               return PyFloat_FromDouble(x);
+       else if (ndigits < NDIGITS_MIN)
+               /* return 0.0, but with sign of x */
+               return PyFloat_FromDouble(0.0*x);
         else
-               number /= f;
-       return PyFloat_FromDouble(number);
+               /* finite x, and ndigits is not unreasonably large */
+               /* _Py_double_round is defined in floatobject.c */
+               return _Py_double_round(x, (int)ndigits);
+#undef NDIGITS_MAX
+#undef NDIGITS_MIN
  }
  
  PyDoc_STRVAR(round_doc,
author	Mark Dickinson <dickinsm@gmail.com>
	Wed, 18 Nov 2009 19:33:35 +0000 (19:33 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Wed, 18 Nov 2009 19:33:35 +0000 (19:33 +0000)
Include/floatobject.h		patch \| blob \| history
Lib/test/test_float.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/floatobject.c		patch \| blob \| history
Python/bltinmodule.c		patch \| blob \| history