Issue #5859: Remove use of fixed-length buffers for float formatting

author Mark Dickinson <dickinsm@gmail.com>

Fri, 1 May 2009 11:42:00 +0000 (11:42 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Fri, 1 May 2009 11:42:00 +0000 (11:42 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Fri, 1 May 2009 11:42:00 +0000 (11:42 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Fri, 1 May 2009 11:42:00 +0000 (11:42 +0000)
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py

index 2a58e582d4a29d38b21078f61859ff908db24238..1637efb51b2e11a581d896cb503698e0493d270b 100644 (file)
--- a/Lib/test/string_tests.py
+++ b/Lib/test/string_tests.py
@@ -1105,14 +1105,7 @@ class MixinStrUnicodeUserStringTest:
              value = 0.01
              for x in range(60):
                  value = value * 3.141592655 / 3.0 * 10.0
-                # The formatfloat() code in stringobject.c and
-                # unicodeobject.c uses a 120 byte buffer and switches from
-                # 'f' formatting to 'g' at precision 50, so we expect
-                # OverflowErrors for the ranges x < 50 and prec >= 67.
-                if x < 50 and prec >= 67:
-                    self.checkraises(OverflowError, format, "__mod__", value)
-                else:
-                    self.checkcall(format, "__mod__", value)
+                self.checkcall(format, "__mod__", value)
  
      def test_inplace_rewrites(self):
          # Check that strings don't copy and modify cached single-character strings
diff --git a/Misc/NEWS b/Misc/NEWS

index 257762b965e0eac74a6a1a5b0e14e24198935135..3e8f5e6478e3fd572962cb974faecd7e3df51c3b 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.1 beta 1?
  Core and Builtins
  -----------------
  
+- Issue #5859: Remove length restrictions for float formatting:
+  '%.67f' % 12.34 and '%.120e' % 12.34 no longer raise an exception.
+
  - Issue #1588: Add complex.__format__. For example, 
    format(complex(1, 2./3), '.5') now produces a sensible result.
  
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 3cea89921d192a586e6c6e05152c3bade2e061c0..31b9a73683c052d72660f2e6016576141ba4b88f 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8792,73 +8792,30 @@ getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
      return NULL;
  }
  
-static void
-strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
-{
-    register Py_ssize_t i;
-    for (i = len - 1; i >= 0; i--)
-        buffer[i] = (Py_UNICODE) charbuffer[i];
-}
+/* Returns a new reference to a PyUnicode object, or NULL on failure. */
  
-static int
-formatfloat(Py_UNICODE *buf,
-            size_t buflen,
-            int flags,
-            int prec,
-            int type,
-            PyObject *v)
-{
-    /* eric.smith: To minimize disturbances in PyUnicode_Format (the
-       only caller of this routine), I'm going to keep the existing
-       API to this function. That means that we'll allocate memory and
-       then copy back into the supplied buffer. But that's better than
-       all of the changes that would be required in PyUnicode_Format
-       because it does lots of memory management tricks. */
-
-    char* p = NULL;
-    int result = -1;
+static PyObject *
+formatfloat(PyObject *v, int flags, int prec, int type)
+{
+    char *p;
+    PyObject *result;
      double x;
-    Py_ssize_t len;
  
      x = PyFloat_AsDouble(v);
      if (x == -1.0 && PyErr_Occurred())
-        goto done;
+        return NULL;
+
      if (prec < 0)
          prec = 6;
  
-    /* make sure that the decimal representation of precision really does
-       need at most 10 digits: platforms with sizeof(int) == 8 exist! */
-    if (prec > 0x7fffffffL) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "outrageously large precision "
-                        "for formatted float");
-        goto done;
-    }
-
      if (type == 'f' && fabs(x) >= 1e50)
          type = 'g';
  
-    if (((type == 'g' || type == 'G') &&
-         buflen <= (size_t)10 + (size_t)prec) ||
-        ((type == 'f' || type == 'F') &&
-         buflen <= (size_t)53 + (size_t)prec)) {
-        PyErr_SetString(PyExc_OverflowError,
-                        "formatted float is too long (precision too large?)");
-        goto done;
-    }
-
      p = PyOS_double_to_string(x, type, prec,
                                (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
-    len = strlen(p);
-    if (len+1 >= buflen) {
-        /* Caller supplied buffer is not large enough. */
-        PyErr_NoMemory();
-        goto done;
-    }
-    strtounicode(buf, p, len);
-    result = Py_SAFE_DOWNCAST(len, Py_ssize_t, int);
-
-done:
+    if (p == NULL)
+        return NULL;
+    result = PyUnicode_FromStringAndSize(p, strlen(p));
      PyMem_Free(p);
      return result;
  }
@@ -8940,14 +8897,9 @@ formatchar(Py_UNICODE *buf,
  }
  
  /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
-
-   FORMATBUFLEN is the length of the buffer in which the floats, ints, &
-   chars are formatted. XXX This is a magic number. Each formatting
-   routine does bounds checking to ensure no overflow, but a better
-   solution may be to malloc a buffer of appropriate size for each
-   format. For now, the current solution is sufficient.
+   FORMATBUFLEN is the length of the buffer in which chars are formatted.
  */
-#define FORMATBUFLEN (size_t)120
+#define FORMATBUFLEN (size_t)10
  
  PyObject *PyUnicode_Format(PyObject *format,
                             PyObject *args)
@@ -9012,7 +8964,7 @@ PyObject *PyUnicode_Format(PyObject *format,
              Py_UNICODE *pbuf;
              Py_UNICODE sign;
              Py_ssize_t len;
-            Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
+            Py_UNICODE formatbuf[FORMATBUFLEN]; /* For formatchar() */
  
              fmt++;
              if (*fmt == '(') {
@@ -9257,11 +9209,11 @@ PyObject *PyUnicode_Format(PyObject *format,
              case 'F':
              case 'g':
              case 'G':
-                pbuf = formatbuf;
-                len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
-                                  flags, prec, c, v);
-                if (len < 0)
+                temp = formatfloat(v, flags, prec, c);
+                if (!temp)
                      goto onError;
+                pbuf = PyUnicode_AS_UNICODE(temp);
+                len = PyUnicode_GET_SIZE(temp);
                  sign = 1;
                  if (flags & F_ZERO)
                      fill = '0';
diff --git a/Python/pystrtod.c b/Python/pystrtod.c

index e68f5d79e0199c60daf1f65ef3cb9dd78a78d7cc..104061056e7110bac7f507235fb942a48b881418 100644 (file)
--- a/Python/pystrtod.c
+++ b/Python/pystrtod.c
@@ -620,12 +620,10 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                                           int flags,
                                           int *type)
  {
-       char buf[128];
         char format[32];
-       Py_ssize_t len;
-       char *result;
-       char *p;
-       int t;
+       Py_ssize_t bufsize;
+       char *buf;
+       int t, exp;
         int upper = 0;
  
         /* Validate format_code, and map upper and lower case */
@@ -669,6 +667,61 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                 return NULL;
         }
  
+       /* Here's a quick-and-dirty calculation to figure out how big a buffer
+          we need.  In general, for a finite float we need:
+
+            1 byte for each digit of the decimal significand, and
+
+            1 for a possible sign
+            1 for a possible decimal point
+            2 for a possible [eE][+-]
+            1 for each digit of the exponent;  if we allow 19 digits
+              total then we're safe up to exponents of 2**63.
+            1 for the trailing nul byte
+
+          This gives a total of 24 + the number of digits in the significand,
+          and the number of digits in the significand is:
+
+            for 'g' format: at most precision, except possibly
+              when precision == 0, when it's 1.
+            for 'e' format: precision+1
+            for 'f' format: precision digits after the point, at least 1
+              before.  To figure out how many digits appear before the point
+              we have to examine the size of the number.  If fabs(val) < 1.0
+              then there will be only one digit before the point.  If
+              fabs(val) >= 1.0, then there are at most
+
+                1+floor(log10(ceiling(fabs(val))))
+
+              digits before the point (where the 'ceiling' allows for the
+              possibility that the rounding rounds the integer part of val
+              up).  A safe upper bound for the above quantity is
+              1+floor(exp/3), where exp is the unique integer such that 0.5
+              <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
+              frexp.
+
+          So we allow room for precision+1 digits for all formats, plus an
+          extra floor(exp/3) digits for 'f' format.
+
+       */
+
+       if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
+               /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
+               bufsize = 5;
+       else {
+               bufsize = 25 + precision;
+               if (format_code == 'f' && fabs(val) >= 1.0) {
+                       frexp(val, &exp);
+                       bufsize += exp/3;
+               }
+       }
+
+       buf = PyMem_Malloc(bufsize);
+       if (buf == NULL) {
+               PyErr_NoMemory();
+               return NULL;
+       }
+
         /* Handle nan and inf. */
         if (Py_IS_NAN(val)) {
                 strcpy(buf, "nan");
@@ -687,38 +740,29 @@ PyAPI_FUNC(char *) PyOS_double_to_string(double val,
                 PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
                               (flags & Py_DTSF_ALT ? "#" : ""), precision,
                               format_code);
-               _PyOS_ascii_formatd(buf, sizeof(buf), format, val, precision);
-       }
-
-       len = strlen(buf);
-
-       /* Add 1 for the trailing 0 byte.
-          Add 1 because we might need to make room for the sign.
-          */
-       result = PyMem_Malloc(len + 2);
-       if (result == NULL) {
-               PyErr_NoMemory();
-               return NULL;
+               _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
         }
-       p = result;
  
         /* Add sign when requested.  It's convenient (esp. when formatting
          complex numbers) to include a sign even for inf and nan. */
-       if (flags & Py_DTSF_SIGN && buf[0] != '-')
-               *p++ = '+';
-
-       strcpy(p, buf);
-
+       if (flags & Py_DTSF_SIGN && buf[0] != '-') {
+               size_t len = strlen(buf);
+               /* the bufsize calculations above should ensure that we've got
+                  space to add a sign */
+               assert((size_t)bufsize >= len+2);
+               memmove(buf+1, buf, len+1);
+               buf[0] = '+';
+       }
         if (upper) {
                 /* Convert to upper case. */
                 char *p1;
-               for (p1 = p; *p1; p1++)
+               for (p1 = buf; *p1; p1++)
                         *p1 = Py_TOUPPER(*p1);
         }
  
         if (type)
                 *type = t;
-       return result;
+       return buf;
  }
  
  #else
author	Mark Dickinson <dickinsm@gmail.com>
	Fri, 1 May 2009 11:42:00 +0000 (11:42 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Fri, 1 May 2009 11:42:00 +0000 (11:42 +0000)
Lib/test/string_tests.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history
Python/pystrtod.c		patch \| blob \| history