Issue #19638: Raise ValueError instead of crashing when converting billion character...

author Mark Dickinson <dickinsm@gmail.com>

Tue, 26 Nov 2013 16:38:25 +0000 (16:38 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Tue, 26 Nov 2013 16:38:25 +0000 (16:38 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Tue, 26 Nov 2013 16:38:25 +0000 (16:38 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Tue, 26 Nov 2013 16:38:25 +0000 (16:38 +0000)
diff --git a/Lib/test/test_strtod.py b/Lib/test/test_strtod.py

index 7bc595daf3aa7db9501c393040d9c408630cd34d..faf5b572a8de3b9215a22f4eb4727e7f1a7720e0 100644 (file)
--- a/Lib/test/test_strtod.py
+++ b/Lib/test/test_strtod.py
@@ -249,6 +249,37 @@ class StrtodTests(unittest.TestCase):
                      else:
                          assert False, "expected ValueError"
  
+    @test_support.bigmemtest(minsize=5 * test_support._1G, memuse=1)
+    def test_oversized_digit_strings(self, maxsize):
+        # Input string whose length doesn't fit in an INT.
+        s = "1." + "1" * int(2.2e9)
+        with self.assertRaises(ValueError):
+            float(s)
+        del s
+
+        s = "0." + "0" * int(2.2e9) + "1"
+        with self.assertRaises(ValueError):
+            float(s)
+        del s
+
+    def test_large_exponents(self):
+        # Verify that the clipping of the exponent in strtod doesn't affect the
+        # output values.
+        def positive_exp(n):
+            """ Long string with value 1.0 and exponent n"""
+            return '0.{}1e+{}'.format('0'*(n-1), n)
+
+        def negative_exp(n):
+            """ Long string with value 1.0 and exponent -n"""
+            return '1{}e-{}'.format('0'*n, n)
+
+        self.assertEqual(float(positive_exp(10000)), 1.0)
+        self.assertEqual(float(positive_exp(20000)), 1.0)
+        self.assertEqual(float(positive_exp(30000)), 1.0)
+        self.assertEqual(float(negative_exp(10000)), 1.0)
+        self.assertEqual(float(negative_exp(20000)), 1.0)
+        self.assertEqual(float(negative_exp(30000)), 1.0)
+
      def test_particular(self):
          # inputs that produced crashes or incorrectly rounded results with
          # previous versions of dtoa.c, for various reasons
diff --git a/Misc/NEWS b/Misc/NEWS

index f845ae36b95b7e354a2550d138830798ee1a0679..856daec7ece5b1f1c7bd46daf0918b3719ae7eee 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@ What's New in Python 2.7.7?
  Core and Builtins
  -----------------
  
+- Issue #19638: Fix possible crash / undefined behaviour from huge (more than 2
+  billion characters) input strings in _Py_dg_strtod.
+
  Library
  -------
  
diff --git a/Python/dtoa.c b/Python/dtoa.c

index 44dc01f1d5e453014ccf5fc46e73a7109d657acd..73e23af010c1731e1f53c1760472f332c57286b3 100644 (file)
--- a/Python/dtoa.c
+++ b/Python/dtoa.c
@@ -204,7 +204,24 @@ typedef union { double d; ULong L[2]; } U;
     MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP.  MAX_ABS_EXP
     should fit into an int. */
  #ifndef MAX_ABS_EXP
-#define MAX_ABS_EXP 19999U
+#define MAX_ABS_EXP 1100000000U
+#endif
+/* Bound on length of pieces of input strings in _Py_dg_strtod; specifically,
+   this is used to bound the total number of digits ignoring leading zeros and
+   the number of digits that follow the decimal point.  Ideally, MAX_DIGITS
+   should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the
+   exponent clipping in _Py_dg_strtod can't affect the value of the output. */
+#ifndef MAX_DIGITS
+#define MAX_DIGITS 1000000000U
+#endif
+
+/* Guard against trying to use the above values on unusual platforms with ints
+ * of width less than 32 bits. */
+#if MAX_ABS_EXP > INT_MAX
+#error "MAX_ABS_EXP should fit in an int"
+#endif
+#if MAX_DIGITS > INT_MAX
+#error "MAX_DIGITS should fit in an int"
  #endif
  
  /* The following definition of Storeinc is appropriate for MIPS processors.
@@ -1498,6 +1515,7 @@ _Py_dg_strtod(const char *s00, char **se)
      Long L;
      BCinfo bc;
      Bigint *bb, *bb1, *bd, *bd0, *bs, *delta;
+    size_t ndigits, fraclen;
  
      dval(&rv) = 0.;
  
@@ -1520,39 +1538,52 @@ _Py_dg_strtod(const char *s00, char **se)
          c = *++s;
      lz = s != s1;
  
-    /* Point s0 at the first nonzero digit (if any).  nd0 will be the position
-       of the point relative to s0.  nd will be the total number of digits
-       ignoring leading zeros. */
+    /* Point s0 at the first nonzero digit (if any).  fraclen will be the
+       number of digits between the decimal point and the end of the
+       digit string.  ndigits will be the total number of digits ignoring
+       leading zeros. */
      s0 = s1 = s;
      while ('0' <= c && c <= '9')
          c = *++s;
-    nd0 = nd = s - s1;
+    ndigits = s - s1;
+    fraclen = 0;
  
      /* Parse decimal point and following digits. */
      if (c == '.') {
          c = *++s;
-        if (!nd) {
+        if (!ndigits) {
              s1 = s;
              while (c == '0')
                  c = *++s;
              lz = lz || s != s1;
-            nd0 -= s - s1;
+            fraclen += (s - s1);
              s0 = s;
          }
          s1 = s;
          while ('0' <= c && c <= '9')
              c = *++s;
-        nd += s - s1;
+        ndigits += s - s1;
+        fraclen += s - s1;
+    }
+
+    /* Now lz is true if and only if there were leading zero digits, and
+       ndigits gives the total number of digits ignoring leading zeros.  A
+       valid input must have at least one digit. */
+    if (!ndigits && !lz) {
+        if (se)
+            *se = (char *)s00;
+        goto parse_error;
      }
  
-    /* Now lz is true if and only if there were leading zero digits, and nd
-       gives the total number of digits ignoring leading zeros.  A valid input
-       must have at least one digit. */
-    if (!nd && !lz) {
+    /* Range check ndigits and fraclen to make sure that they, and values
+       computed with them, can safely fit in an int. */
+    if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) {
          if (se)
              *se = (char *)s00;
          goto parse_error;
      }
+    nd = (int)ndigits;
+    nd0 = (int)ndigits - (int)fraclen;
  
      /* Parse exponent. */
      e = 0;
@@ -1886,20 +1917,20 @@ _Py_dg_strtod(const char *s00, char **se)
          bd2++;
  
          /* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1,
-          and bs == 1, so:
+           and bs == 1, so:
  
                tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5)
                srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2)
-             0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
+              0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2)
  
-          It follows that:
+           It follows that:
  
                M * tdv = bd * 2**bd2 * 5**bd5
                M * srv = bb * 2**bb2 * 5**bb5
                M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5
  
-          for some constant M.  (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
-          this fact is not needed below.)
+           for some constant M.  (Actually, M == 2**(bb2 - bbe) * 5**bb5, but
+           this fact is not needed below.)
          */
  
          /* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */
author	Mark Dickinson <dickinsm@gmail.com>
	Tue, 26 Nov 2013 16:38:25 +0000 (16:38 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Tue, 26 Nov 2013 16:38:25 +0000 (16:38 +0000)
Lib/test/test_strtod.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Python/dtoa.c		patch \| blob \| history