Issue #14700: Fix buggy overflow checks for large precision and width in new-style...

author Mark Dickinson <mdickinson@enthought.com>

Sun, 28 Oct 2012 10:00:46 +0000 (10:00 +0000)

committer Mark Dickinson <mdickinson@enthought.com>

Sun, 28 Oct 2012 10:00:46 +0000 (10:00 +0000)
author Mark Dickinson <mdickinson@enthought.com>
Sun, 28 Oct 2012 10:00:46 +0000 (10:00 +0000)
committer Mark Dickinson <mdickinson@enthought.com>
Sun, 28 Oct 2012 10:00:46 +0000 (10:00 +0000)
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py

index 2ecf3276b401343153b27a1589209eede09356c1..eb704ead4f9874dc280adbc1dc7a5c2dcbf1caec 100644 (file)
--- a/Lib/test/test_str.py
+++ b/Lib/test/test_str.py
@@ -35,6 +35,18 @@ class StrTest(
          string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
          self.assertRaises(OverflowError, '%c'.__mod__, 0x1234)
  
+    @test_support.cpython_only
+    def test_formatting_huge_precision(self):
+        from _testcapi import INT_MAX
+        format_string = "%.{}f".format(INT_MAX + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
+
+    def test_formatting_huge_width(self):
+        format_string = "%{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
+
      def test_conversion(self):
          # Make sure __str__() behaves properly
          class Foo0:
@@ -371,6 +383,21 @@ class StrTest(
          self.assertRaises(ValueError, format, "", "-")
          self.assertRaises(ValueError, "{0:=s}".format, '')
  
+    def test_format_huge_precision(self):
+        format_string = ".{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+
+    def test_format_huge_width(self):
+        format_string = "{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+
+    def test_format_huge_item_number(self):
+        format_string = "{{{}:.6f}}".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string.format(2.34)
+
      def test_format_auto_numbering(self):
          class C:
              def __init__(self, x=100):
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index fda44da8888e51422f78acdd6db230c8ac1b817e..e44fe03d240d281fabc98d4e5d954e7f2d0aff07 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -644,6 +644,18 @@ class UnicodeTest(
                  return u'\u1234'
          self.assertEqual('%s' % Wrapper(), u'\u1234')
  
+    @test_support.cpython_only
+    def test_formatting_huge_precision(self):
+        from _testcapi import INT_MAX
+        format_string = u"%.{}f".format(INT_MAX + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
+
+    def test_formatting_huge_width(self):
+        format_string = u"%{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string % 2.34
+
      def test_startswith_endswith_errors(self):
          for meth in (u'foo'.startswith, u'foo'.endswith):
              with self.assertRaises(UnicodeDecodeError):
@@ -1556,6 +1568,21 @@ class UnicodeTest(
          #  will fail
          self.assertRaises(UnicodeEncodeError, "foo{0}".format, u'\u1000bar')
  
+    def test_format_huge_precision(self):
+        format_string = u".{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+
+    def test_format_huge_width(self):
+        format_string = u"{}f".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format(2.34, format_string)
+
+    def test_format_huge_item_number(self):
+        format_string = u"{{{}:.6f}}".format(sys.maxsize + 1)
+        with self.assertRaises(ValueError):
+            result = format_string.format(2.34)
+
      def test_format_auto_numbering(self):
          class C:
              def __init__(self, x=100):
diff --git a/Misc/NEWS b/Misc/NEWS

index 9d29b429519bc28d949a494452842d07f08b179c..f384a2e8708040682d387c4757619f7b9a02c519 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,9 @@ What's New in Python 2.7.4
  Core and Builtins
  -----------------
  
+- Issue #14700: Fix buggy overflow checks for large width and precision
+  in string formatting operations.
+
  - Issue #6074: Ensure cached bytecode files can always be updated by the
    user that created them, even when the source file is read-only.
  
diff --git a/Objects/stringlib/formatter.h b/Objects/stringlib/formatter.h

index c49a1042ff602453beb770795b1b1c5328513b44..6b282249b1797110a149467952a91a9f6a83a14c 100644 (file)
--- a/Objects/stringlib/formatter.h
+++ b/Objects/stringlib/formatter.h
@@ -73,7 +73,7 @@ static int
  get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
                    Py_ssize_t *result)
  {
-    Py_ssize_t accumulator, digitval, oldaccumulator;
+    Py_ssize_t accumulator, digitval;
      int numdigits;
      accumulator = numdigits = 0;
      for (;;(*ptr)++, numdigits++) {
@@ -83,19 +83,17 @@ get_integer(STRINGLIB_CHAR **ptr, STRINGLIB_CHAR *end,
          if (digitval < 0)
              break;
          /*
-           This trick was copied from old Unicode format code.  It's cute,
-           but would really suck on an old machine with a slow divide
-           implementation.  Fortunately, in the normal case we do not
-           expect too many digits.
+           Detect possible overflow before it happens:
+
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
          */
-        oldaccumulator = accumulator;
-        accumulator *= 10;
-        if ((accumulator+10)/10 != oldaccumulator+1) {
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
              PyErr_Format(PyExc_ValueError,
                           "Too many decimal digits in format string");
              return -1;
          }
-        accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
      }
      *result = accumulator;
      return numdigits;
diff --git a/Objects/stringlib/string_format.h b/Objects/stringlib/string_format.h

index 075fa1dee96df71057aaf7f2d2cbb5a4b126e197..965e1ad0ef3babe860ba0df2cebe89ca00d413e3 100644 (file)
--- a/Objects/stringlib/string_format.h
+++ b/Objects/stringlib/string_format.h
@@ -197,7 +197,6 @@ get_integer(const SubString *str)
  {
      Py_ssize_t accumulator = 0;
      Py_ssize_t digitval;
-    Py_ssize_t oldaccumulator;
      STRINGLIB_CHAR *p;
  
      /* empty string is an error */
@@ -209,19 +208,17 @@ get_integer(const SubString *str)
          if (digitval < 0)
              return -1;
          /*
-           This trick was copied from old Unicode format code.  It's cute,
-           but would really suck on an old machine with a slow divide
-           implementation.  Fortunately, in the normal case we do not
-           expect too many digits.
+           Detect possible overflow before it happens:
+
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
          */
-        oldaccumulator = accumulator;
-        accumulator *= 10;
-        if ((accumulator+10)/10 != oldaccumulator+1) {
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
              PyErr_Format(PyExc_ValueError,
                           "Too many decimal digits in format string");
              return -1;
          }
-        accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
      }
      return accumulator;
  }
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 39fa7404a7410e8c4a29cd87d763ddfcd582071c..152ea215f37ef4766d51fbf2bf4091caadda5a1e 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -4369,7 +4369,7 @@ PyString_Format(PyObject *format, PyObject *args)
                      c = Py_CHARMASK(*fmt++);
                      if (!isdigit(c))
                          break;
-                    if ((width*10) / 10 != width) {
+                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
                          PyErr_SetString(
                              PyExc_ValueError,
                              "width too big");
@@ -4404,7 +4404,7 @@ PyString_Format(PyObject *format, PyObject *args)
                          c = Py_CHARMASK(*fmt++);
                          if (!isdigit(c))
                              break;
-                        if ((prec*10) / 10 != prec) {
+                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
                              PyErr_SetString(
                                  PyExc_ValueError,
                                  "prec too big");
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index e3c2cb1e2a1bc91e34c4f2d7c2f8502c44b3ded9..79b87df7e7397b5478828ba7d7d4e0a53994695a 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -8394,7 +8394,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                      c = *fmt++;
                      if (c < '0' || c > '9')
                          break;
-                    if ((width*10) / 10 != width) {
+                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
                          PyErr_SetString(PyExc_ValueError,
                                          "width too big");
                          goto onError;
@@ -8427,7 +8427,7 @@ PyObject *PyUnicode_Format(PyObject *format,
                          c = *fmt++;
                          if (c < '0' || c > '9')
                              break;
-                        if ((prec*10) / 10 != prec) {
+                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
                              PyErr_SetString(PyExc_ValueError,
                                              "prec too big");
                              goto onError;
author	Mark Dickinson <mdickinson@enthought.com>
	Sun, 28 Oct 2012 10:00:46 +0000 (10:00 +0000)
committer	Mark Dickinson <mdickinson@enthought.com>
	Sun, 28 Oct 2012 10:00:46 +0000 (10:00 +0000)
Lib/test/test_str.py		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Objects/stringlib/formatter.h		patch \| blob \| history
Objects/stringlib/string_format.h		patch \| blob \| history
Objects/stringobject.c		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history