]> granicus.if.org Git - python/commitdiff
Forward-port of r52136: a review of overflow-detecting code.
authorArmin Rigo <arigo@tunes.org>
Wed, 4 Oct 2006 11:44:06 +0000 (11:44 +0000)
committerArmin Rigo <arigo@tunes.org>
Wed, 4 Oct 2006 11:44:06 +0000 (11:44 +0000)
* unified the way intobject, longobject and mystrtoul handle
  values around -sys.maxint-1.

* in general, trying to entierely avoid overflows in any computation
  involving signed ints or longs is extremely involved.  Fixed a few
  simple cases where a compiler might be too clever (but that's all
  guesswork).

* more overflow checks against bad data in marshal.c.

* 2.5 specific: fixed a number of places that were still confusing int
  and Py_ssize_t.  Some of them could potentially have caused
  "real-world" breakage.

* list.pop(x): fixing overflow issues on x was messy.  I just reverted
  to PyArg_ParseTuple("n"), which does the right thing.  (An obscure
  test was trying to give a Decimal to list.pop()... doesn't make
  sense any more IMHO)

* trying to write a few tests...

19 files changed:
Lib/test/list_tests.py
Lib/test/test_builtin.py
Lib/test/test_long.py
Misc/NEWS
Modules/cPickle.c
Objects/abstract.c
Objects/fileobject.c
Objects/intobject.c
Objects/listobject.c
Objects/longobject.c
Objects/stringobject.c
Objects/typeobject.c
Objects/unicodeobject.c
Python/errors.c
Python/getargs.c
Python/marshal.c
Python/modsupport.c
Python/mystrtoul.c
Python/sysmodule.c

index 14b54c77352a319d0d7707298ad4b22fcd323ef8..7c6623a22f83744063c5a90f612b9e84afa3b154 100644 (file)
@@ -269,7 +269,6 @@ class CommonTest(seq_tests.CommonTest):
         self.assertRaises(TypeError, a.insert)
 
     def test_pop(self):
-        from decimal import Decimal
         a = self.type2test([-1, 0, 1])
         a.pop()
         self.assertEqual(a, [-1, 0])
@@ -281,8 +280,6 @@ class CommonTest(seq_tests.CommonTest):
         self.assertRaises(IndexError, a.pop)
         self.assertRaises(TypeError, a.pop, 42, 42)
         a = self.type2test([0, 10, 20, 30, 40])
-        self.assertEqual(a.pop(Decimal(2)), 20)
-        self.assertRaises(IndexError, a.pop, Decimal(25))
 
     def test_remove(self):
         a = self.type2test([0, 0, 1])
index f3bdbe29c4f2b56f49d8f5d823bb577d770c0a97..f7cf8118e2fcea9367f363f59153d82f81cde4c3 100644 (file)
@@ -156,6 +156,11 @@ class BuiltinTest(unittest.TestCase):
         S = [10, 20, 30]
         self.assertEqual(any(x > 42 for x in S), False)
 
+    def test_neg(self):
+        x = -sys.maxint-1
+        self.assert_(isinstance(x, int))
+        self.assertEqual(-x, sys.maxint+1)
+
     def test_apply(self):
         def f0(*args):
             self.assertEqual(args, ())
@@ -702,9 +707,11 @@ class BuiltinTest(unittest.TestCase):
                         pass
 
         s = repr(-1-sys.maxint)
-        self.assertEqual(int(s)+1, -sys.maxint)
+        x = int(s)
+        self.assertEqual(x+1, -sys.maxint)
+        self.assert_(isinstance(x, int))
         # should return long
-        int(s[1:])
+        self.assertEqual(int(s[1:]), sys.maxint+1)
 
         # should return long
         x = int(1e100)
index 7b0c7b0798a49f25c2b7f6612ae67a0641a345ee..ae132adad93b2b3237846794e168d3bde93b502a 100644 (file)
@@ -247,17 +247,23 @@ class LongTest(unittest.TestCase):
             "long(-sys.maxint-1) != -sys.maxint-1")
 
         # long -> int should not fail for hugepos_aslong or hugeneg_aslong
+        x = int(hugepos_aslong)
         try:
-            self.assertEqual(int(hugepos_aslong), hugepos,
+            self.assertEqual(x, hugepos,
                   "converting sys.maxint to long and back to int fails")
         except OverflowError:
             self.fail("int(long(sys.maxint)) overflowed!")
+        if not isinstance(x, int):
+            raise TestFailed("int(long(sys.maxint)) should have returned int")
+        x = int(hugeneg_aslong)
         try:
-            self.assertEqual(int(hugeneg_aslong), hugeneg,
+            self.assertEqual(x, hugeneg,
                   "converting -sys.maxint-1 to long and back to int fails")
         except OverflowError:
             self.fail("int(long(-sys.maxint-1)) overflowed!")
-
+        if not isinstance(x, int):
+            raise TestFailed("int(long(-sys.maxint-1)) should have "
+                             "returned int")
         # but long -> int should overflow for hugepos+1 and hugeneg-1
         x = hugepos_aslong + 1
         try:
@@ -282,6 +288,17 @@ class LongTest(unittest.TestCase):
         self.assert_(type(y) is long,
             "overflowing int conversion must return long not long subtype")
 
+        # long -> Py_ssize_t conversion
+        class X(object):
+            def __getslice__(self, i, j):
+                return i, j
+
+        self.assertEqual(X()[-5L:7L], (-5, 7))
+        # use the clamping effect to test the smallest and largest longs
+        # that fit a Py_ssize_t
+        slicemin, slicemax = X()[-2L**100:2L**100]
+        self.assertEqual(X()[slicemin:slicemax], (slicemin, slicemax))
+
 # ----------------------------------- tests of auto int->long conversion
 
     def test_auto_overflow(self):
index e71be104e9f5785d044708de64f29f85b816099c..85587ebf3a69e8e6a05b14b71169c889c8b14d92 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,8 +12,14 @@ What's New in Python 2.5.1c1?
 Core and builtins
 -----------------
 
-- Integer negation and absolute value were fixed to not rely
-  on undefined behaviour of the C compiler anymore.
+- list.pop(x) accepts any object x following the __index__ protocol.
+
+- Fix some leftovers from the conversion from int to Py_ssize_t
+  (relevant to strings and sequences of more than 2**31 items).
+
+- A number of places, including integer negation and absolute value,
+  were fixed to not rely on undefined behaviour of the C compiler
+  anymore.
 
 - Bug #1566800: make sure that EnvironmentError can be called with any
   number of arguments, as was the case in Python 2.4.
index 24c98ccb225602ff51ce27c95986a10d079137f2..4f7d1f198afb84edd1f5eb815d8def0220b22861 100644 (file)
@@ -1024,7 +1024,7 @@ save_int(Picklerobject *self, PyObject *args)
 static int
 save_long(Picklerobject *self, PyObject *args)
 {
-       int size;
+       Py_ssize_t size;
        int res = -1;
        PyObject *repr = NULL;
 
@@ -1066,7 +1066,7 @@ save_long(Picklerobject *self, PyObject *args)
                 * byte at the start, and cut it back later if possible.
                 */
                nbytes = (nbits >> 3) + 1;
-               if ((int)nbytes < 0 || (size_t)(int)nbytes != nbytes) {
+               if (nbytes > INT_MAX) {
                        PyErr_SetString(PyExc_OverflowError, "long too large "
                                "to pickle");
                        goto finally;
@@ -1208,12 +1208,14 @@ save_string(Picklerobject *self, PyObject *args, int doput)
                        c_str[1] = size;
                        len = 2;
                }
-               else {
+               else if (size <= INT_MAX) {
                        c_str[0] = BINSTRING;
                        for (i = 1; i < 5; i++)
                                c_str[i] = (int)(size >> ((i - 1) * 8));
                        len = 5;
                }
+               else
+                       return -1;    /* string too large */
 
                if (self->write_func(self, c_str, len) < 0)
                        return -1;
@@ -1286,7 +1288,7 @@ modified_EncodeRawUnicodeEscape(const Py_UNICODE *s, int size)
 static int
 save_unicode(Picklerobject *self, PyObject *args, int doput)
 {
-       int size, len;
+       Py_ssize_t size, len;
        PyObject *repr=0;
 
        if (!PyUnicode_Check(args))
@@ -1325,6 +1327,8 @@ save_unicode(Picklerobject *self, PyObject *args, int doput)
 
                if ((size = PyString_Size(repr)) < 0)
                        goto err;
+               if (size > INT_MAX)
+                       return -1;   /* string too large */
 
                c_str[0] = BINUNICODE;
                for (i = 1; i < 5; i++)
index a18bb787eaba9bea167f4d0ddfbc7bb84ccc532a..7115c523c2daa417eaf5c787db5bbc2295145d0f 100644 (file)
@@ -1652,20 +1652,18 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation)
                if (cmp > 0) {
                        switch (operation) {
                        case PY_ITERSEARCH_COUNT:
-                               ++n;
-                               if (n <= 0) {
-                                       /* XXX(nnorwitz): int means ssize_t */
+                               if (n == PY_SSIZE_T_MAX) {
                                        PyErr_SetString(PyExc_OverflowError,
-                                               "count exceeds C int size");
+                                              "count exceeds C integer size");
                                        goto Fail;
                                }
+                               ++n;
                                break;
 
                        case PY_ITERSEARCH_INDEX:
                                if (wrapped) {
-                                       /* XXX(nnorwitz): int means ssize_t */
                                        PyErr_SetString(PyExc_OverflowError,
-                                               "index exceeds C int size");
+                                              "index exceeds C integer size");
                                        goto Fail;
                                }
                                goto Done;
@@ -1680,9 +1678,9 @@ _PySequence_IterSearch(PyObject *seq, PyObject *obj, int operation)
                }
 
                if (operation == PY_ITERSEARCH_INDEX) {
-                       ++n;
-                       if (n <= 0)
+                       if (n == PY_SSIZE_T_MAX)
                                wrapped = 1;
+                       ++n;
                }
        }
 
index b43bf85b87273b4f901ed8a5c46298c23e3ecfef..ced07684fd31222f5ddc31d0555dd79770196456 100644 (file)
@@ -1001,6 +1001,7 @@ getline_via_fgets(FILE *fp)
        size_t nfree;   /* # of free buffer slots; pvend-pvfree */
        size_t total_v_size;  /* total # of slots in buffer */
        size_t increment;       /* amount to increment the buffer */
+       size_t prev_v_size;
 
        /* Optimize for normal case:  avoid _PyString_Resize if at all
         * possible via first reading into stack buffer "buf".
@@ -1115,8 +1116,11 @@ getline_via_fgets(FILE *fp)
                /* expand buffer and try again */
                assert(*(pvend-1) == '\0');
                increment = total_v_size >> 2;  /* mild exponential growth */
+               prev_v_size = total_v_size;
                total_v_size += increment;
-               if (total_v_size > PY_SSIZE_T_MAX) {
+               /* check for overflow */
+               if (total_v_size <= prev_v_size ||
+                   total_v_size > PY_SSIZE_T_MAX) {
                        PyErr_SetString(PyExc_OverflowError,
                            "line is longer than a Python string can hold");
                        Py_DECREF(v);
@@ -1125,7 +1129,7 @@ getline_via_fgets(FILE *fp)
                if (_PyString_Resize(&v, (int)total_v_size) < 0)
                        return NULL;
                /* overwrite the trailing null byte */
-               pvfree = BUF(v) + (total_v_size - increment - 1);
+               pvfree = BUF(v) + (prev_v_size - 1);
        }
        if (BUF(v) + total_v_size != p)
                _PyString_Resize(&v, p - BUF(v));
index 28f760671078ee6699f3ee51e55f4a376cefdc26..a4d50be1294d59bdd10a8194ca92ef298cea3119 100644 (file)
@@ -546,6 +546,17 @@ int_mul(PyObject *v, PyObject *w)
        }
 }
 
+/* Integer overflow checking for unary negation: on a 2's-complement
+ * box, -x overflows iff x is the most negative long.  In this case we
+ * get -x == x.  However, -x is undefined (by C) if x /is/ the most
+ * negative long (it's a signed overflow case), and some compilers care.
+ * So we cast x to unsigned long first.  However, then other compilers
+ * warn about applying unary minus to an unsigned operand.  Hence the
+ * weird "0-".
+ */
+#define UNARY_NEG_WOULD_OVERFLOW(x)    \
+       ((x) < 0 && (unsigned long)(x) == 0-(unsigned long)(x))
+
 /* Return type of i_divmod */
 enum divmod_result {
        DIVMOD_OK,              /* Correct result */
@@ -564,14 +575,8 @@ i_divmod(register long x, register long y,
                                "integer division or modulo by zero");
                return DIVMOD_ERROR;
        }
-       /* (-sys.maxint-1)/-1 is the only overflow case.  x is the most
-        * negative long iff x < 0 and, on a 2's-complement box, x == -x.
-        * However, -x is undefined (by C) if x /is/ the most negative long
-        * (it's a signed overflow case), and some compilers care.  So we cast
-        * x to unsigned long first.  However, then other compilers warn about
-        * applying unary minus to an unsigned operand.  Hence the weird "0-".
-        */
-       if (y == -1 && x < 0 && (unsigned long)x == 0-(unsigned long)x)
+       /* (-sys.maxint-1)/-1 is the only overflow case. */
+       if (y == -1 && UNARY_NEG_WOULD_OVERFLOW(x))
                return DIVMOD_OVERFLOW;
        xdivy = x / y;
        xmody = x - xdivy * y;
@@ -762,7 +767,8 @@ int_neg(PyIntObject *v)
 {
        register long a;
        a = v->ob_ival;
-       if (a < 0 && (unsigned long)a == 0-(unsigned long)a) {
+        /* check for overflow */
+       if (UNARY_NEG_WOULD_OVERFLOW(a)) {
                PyObject *o = PyLong_FromLong(a);
                if (o != NULL) {
                        PyObject *result = PyNumber_Negative(o);
index ad276447d3f132f39d60caf492229752cc1ce29d..7afea121cdd552354cfa8cc9f9ea0eb80824a18a 100644 (file)
@@ -863,17 +863,12 @@ static PyObject *
 listpop(PyListObject *self, PyObject *args)
 {
        Py_ssize_t i = -1;
-       PyObject *v, *arg = NULL;
+       PyObject *v;
        int status;
 
-       if (!PyArg_UnpackTuple(args, "pop", 0, 1, &arg))
+       if (!PyArg_ParseTuple(args, "|n:pop", &i))
                return NULL;
-       if (arg != NULL) {
-               if (PyInt_Check(arg))
-                       i = PyInt_AS_LONG((PyIntObject*) arg);
-               else if (!PyArg_ParseTuple(args, "|n:pop", &i))
-                       return NULL;
-       }
+
        if (self->ob_size == 0) {
                /* Special-case most common failure cause */
                PyErr_SetString(PyExc_IndexError, "pop from empty list");
index e32c42566eb24cbf25497751db16702fed06d0f2..4d886cd7242b27eff3888f513125ed932c4eab8d 100644 (file)
@@ -193,6 +193,18 @@ PyLong_FromDouble(double dval)
        return (PyObject *)v;
 }
 
+/* Checking for overflow in PyLong_AsLong is a PITA since C doesn't define
+ * anything about what happens when a signed integer operation overflows,
+ * and some compilers think they're doing you a favor by being "clever"
+ * then.  The bit pattern for the largest postive signed long is
+ * (unsigned long)LONG_MAX, and for the smallest negative signed long
+ * it is abs(LONG_MIN), which we could write -(unsigned long)LONG_MIN.
+ * However, some other compilers warn about applying unary minus to an
+ * unsigned operand.  Hence the weird "0-".
+ */
+#define PY_ABS_LONG_MIN                (0-(unsigned long)LONG_MIN)
+#define PY_ABS_SSIZE_T_MIN     (0-(size_t)PY_SSIZE_T_MIN)
+
 /* Get a C long int from a long int object.
    Returns -1 and sets an error condition if overflow occurs. */
 
@@ -225,14 +237,16 @@ PyLong_AsLong(PyObject *vv)
                if ((x >> SHIFT) != prev)
                        goto overflow;
        }
-       /* Haven't lost any bits, but if the sign bit is set we're in
-        * trouble *unless* this is the min negative number.  So,
-        * trouble iff sign bit set && (positive || some bit set other
-        * than the sign bit).
-        */
-       if ((long)x < 0 && (sign > 0 || (x << 1) != 0))
-               goto overflow;
-       return (long)x * sign;
+       /* Haven't lost any bits, but casting to long requires extra care
+        * (see comment above).
+         */
+       if (x <= (unsigned long)LONG_MAX) {
+               return (long)x * sign;
+       }
+       else if (sign < 0 && x == PY_ABS_LONG_MIN) {
+               return LONG_MIN;
+       }
+       /* else overflow */
 
  overflow:
        PyErr_SetString(PyExc_OverflowError,
@@ -268,14 +282,16 @@ _PyLong_AsSsize_t(PyObject *vv) {
                if ((x >> SHIFT) != prev)
                        goto overflow;
        }
-       /* Haven't lost any bits, but if the sign bit is set we're in
-        * trouble *unless* this is the min negative number.  So,
-        * trouble iff sign bit set && (positive || some bit set other
-        * than the sign bit).
+       /* Haven't lost any bits, but casting to a signed type requires
+        * extra care (see comment above).
         */
-       if ((Py_ssize_t)x < 0 && (sign > 0 || (x << 1) != 0))
-               goto overflow;
-       return (Py_ssize_t)x * sign;
+       if (x <= (size_t)PY_SSIZE_T_MAX) {
+               return (Py_ssize_t)x * sign;
+       }
+       else if (sign < 0 && x == PY_ABS_SSIZE_T_MIN) {
+               return PY_SSIZE_T_MIN;
+       }
+       /* else overflow */
 
  overflow:
        PyErr_SetString(PyExc_OverflowError,
@@ -1167,7 +1183,7 @@ long_format(PyObject *aa, int base, int addL)
 {
        register PyLongObject *a = (PyLongObject *)aa;
        PyStringObject *str;
-       Py_ssize_t i;
+       Py_ssize_t i, j, sz;
        Py_ssize_t size_a;
        char *p;
        int bits;
@@ -1187,11 +1203,18 @@ long_format(PyObject *aa, int base, int addL)
                ++bits;
                i >>= 1;
        }
-       i = 5 + (addL ? 1 : 0) + (size_a*SHIFT + bits-1) / bits;
-       str = (PyStringObject *) PyString_FromStringAndSize((char *)0, i);
+       i = 5 + (addL ? 1 : 0);
+       j = size_a*SHIFT + bits-1;
+       sz = i + j / bits;
+       if (j / SHIFT < size_a || sz < i) {
+               PyErr_SetString(PyExc_OverflowError,
+                               "long is too large to format");
+               return NULL;
+       }
+       str = (PyStringObject *) PyString_FromStringAndSize((char *)0, sz);
        if (str == NULL)
                return NULL;
-       p = PyString_AS_STRING(str) + i;
+       p = PyString_AS_STRING(str) + sz;
        *p = '\0';
         if (addL)
                 *--p = 'L';
@@ -1305,7 +1328,7 @@ long_format(PyObject *aa, int base, int addL)
                } while ((*q++ = *p++) != '\0');
                q--;
                _PyString_Resize((PyObject **)&str,
-                                (int) (q - PyString_AS_STRING(str)));
+                                (Py_ssize_t) (q - PyString_AS_STRING(str)));
        }
        return (PyObject *)str;
 }
@@ -1363,14 +1386,14 @@ long_from_binary_base(char **str, int base)
        while (_PyLong_DigitValue[Py_CHARMASK(*p)] < base)
                ++p;
        *str = p;
-       n = (p - start) * bits_per_char;
-       if (n / bits_per_char != p - start) {
+       /* n <- # of Python digits needed, = ceiling(n/SHIFT). */
+       n = (p - start) * bits_per_char + SHIFT - 1;
+       if (n / bits_per_char < p - start) {
                PyErr_SetString(PyExc_ValueError,
                                "long string too large to convert");
                return NULL;
        }
-       /* n <- # of Python digits needed, = ceiling(n/SHIFT). */
-       n = (n + SHIFT - 1) / SHIFT;
+       n = n / SHIFT;
        z = _PyLong_New(n);
        if (z == NULL)
                return NULL;
index 4c2faf45437da314dcd30cce1892c7c3121e4866..aa2fd872ddb4bb99d67eeec8781704f9b66a8216 100644 (file)
@@ -804,10 +804,22 @@ string_print(PyStringObject *op, FILE *fp, int flags)
                return ret;
        }
        if (flags & Py_PRINT_RAW) {
+               char *data = op->ob_sval;
+               Py_ssize_t size = op->ob_size;
+               while (size > INT_MAX) {
+                       /* Very long strings cannot be written atomically.
+                        * But don't write exactly INT_MAX bytes at a time
+                        * to avoid memory aligment issues.
+                        */
+                       const int chunk_size = INT_MAX & ~0x3FFF;
+                       fwrite(data, 1, chunk_size, fp);
+                       data += chunk_size;
+                       size -= chunk_size;
+               }
 #ifdef __VMS
-                if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
+                if (size) fwrite(data, (int)size, 1, fp);
 #else
-                fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
+                fwrite(data, 1, (int)size, fp);
 #endif
                return 0;
        }
@@ -844,7 +856,7 @@ PyString_Repr(PyObject *obj, int smartquotes)
        register PyStringObject* op = (PyStringObject*) obj;
        size_t newsize = 2 + 4 * op->ob_size;
        PyObject *v;
-       if (newsize > PY_SSIZE_T_MAX) {
+       if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
                PyErr_SetString(PyExc_OverflowError,
                        "string is too large to make repr");
        }
@@ -4237,7 +4249,7 @@ _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
                return NULL;
        }
        llen = PyString_Size(result);
-       if (llen > PY_SSIZE_T_MAX) {
+       if (llen > INT_MAX) {
                PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
                return NULL;
        }
@@ -4726,9 +4738,10 @@ PyString_Format(PyObject *format, PyObject *args)
                        default:
                                PyErr_Format(PyExc_ValueError,
                                  "unsupported format character '%c' (0x%x) "
-                                 "at index %i",
+                                 "at index %zd",
                                  c, c,
-                                 (int)(fmt - 1 - PyString_AsString(format)));
+                                 (Py_ssize_t)(fmt - 1 -
+                                              PyString_AsString(format)));
                                goto error;
                        }
                        if (sign) {
index 4d99f7d67f9b460c795076553a7e1a939b66217a..d4a46c37f02e122dca5a31b0f62cbb824f4de3c1 100644 (file)
@@ -98,7 +98,7 @@ type_module(PyTypeObject *type, void *context)
                s = strrchr(type->tp_name, '.');
                if (s != NULL)
                        return PyString_FromStringAndSize(
-                               type->tp_name, (int)(s - type->tp_name));
+                           type->tp_name, (Py_ssize_t)(s - type->tp_name));
                return PyString_FromString("__builtin__");
        }
 }
@@ -4116,19 +4116,10 @@ slot_sq_length(PyObject *self)
                return -1;
        len = PyInt_AsSsize_t(res);
        Py_DECREF(res);
-       if (len == -1 && PyErr_Occurred())
-               return -1;
-#if SIZEOF_SIZE_T < SIZEOF_INT
-       /* Overflow check -- range of PyInt is more than C ssize_t */
-       if (len != (int)len) {
-               PyErr_SetString(PyExc_OverflowError,
-                       "__len__() should return 0 <= outcome < 2**31");
-               return -1;
-       }
-#endif
        if (len < 0) {
-               PyErr_SetString(PyExc_ValueError,
-                               "__len__() should return >= 0");
+               if (!PyErr_Occurred())
+                       PyErr_SetString(PyExc_ValueError,
+                                       "__len__() should return >= 0");
                return -1;
        }
        return len;
index 2ae3f61d230054326c4b10e4639fd7c245522484..00f2018da0c551c43da0a7e1aae7a0ded7c5aeac 100644 (file)
@@ -2380,6 +2380,7 @@ PyObject *_PyUnicode_DecodeUnicodeInternal(const char *s,
     Py_UNICODE unimax = PyUnicode_GetMax();
 #endif
 
+    /* XXX overflow detection missing */
     v = _PyUnicode_New((size+Py_UNICODE_SIZE-1)/ Py_UNICODE_SIZE);
     if (v == NULL)
        goto onError;
@@ -3166,6 +3167,7 @@ PyObject *PyUnicode_DecodeCharmap(const char *s,
                        Py_ssize_t needed = (targetsize - extrachars) + \
                                     (targetsize << 2);
                        extrachars += needed;
+                       /* XXX overflow detection missing */
                        if (_PyUnicode_Resize(&v,
                                             PyUnicode_GET_SIZE(v) + needed) < 0) {
                            Py_DECREF(x);
@@ -7758,10 +7760,11 @@ PyObject *PyUnicode_Format(PyObject *format,
            default:
                PyErr_Format(PyExc_ValueError,
                             "unsupported format character '%c' (0x%x) "
-                            "at index %i",
+                            "at index %zd",
                             (31<=c && c<=126) ? (char)c : '?',
                              (int)c,
-                            (int)(fmt -1 - PyUnicode_AS_UNICODE(uformat)));
+                            (Py_ssize_t)(fmt - 1 -
+                                         PyUnicode_AS_UNICODE(uformat)));
                goto onError;
            }
            if (sign) {
index 7b7105104ad719c5c5d561636c559ca2ffea6014..66a734eb15c7a07610463111ae970ec663cde399 100644 (file)
@@ -551,7 +551,8 @@ PyErr_NewException(char *name, PyObject *base, PyObject *dict)
                        goto failure;
        }
        if (PyDict_GetItemString(dict, "__module__") == NULL) {
-               modulename = PyString_FromStringAndSize(name, (int)(dot-name));
+               modulename = PyString_FromStringAndSize(name,
+                                                    (Py_ssize_t)(dot-name));
                if (modulename == NULL)
                        goto failure;
                if (PyDict_SetItemString(dict, "__module__", modulename) != 0)
index 3fca9cdfce358e3f3c6c2076ee5d4f1e34e53983..d6255986327cd7b7d133ab1e5b16ea5b51801241 100644 (file)
@@ -815,7 +815,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
 #endif
                        else
                                return converterr("string", arg, msgbuf, bufsize);
-                       if ((int)strlen(*p) != PyString_Size(arg))
+                       if ((Py_ssize_t)strlen(*p) != PyString_Size(arg))
                                return converterr("string without null bytes",
                                                  arg, msgbuf, bufsize);
                }
@@ -882,7 +882,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                                format++;
                        }
                        else if (*p != NULL &&
-                                (int)strlen(*p) != PyString_Size(arg))
+                                (Py_ssize_t)strlen(*p) != PyString_Size(arg))
                                return converterr(
                                        "string without null bytes or None", 
                                        arg, msgbuf, bufsize);
@@ -1029,7 +1029,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                           PyMem_Free()ing it after usage
 
                        */
-                       if ((int)strlen(PyString_AS_STRING(s)) != size) {
+                       if ((Py_ssize_t)strlen(PyString_AS_STRING(s))
+                                                               != size) {
                                Py_DECREF(s);
                                return converterr(
                                        "(encoded string without NULL bytes)",
index c3bc87f21f04c86d676b14ad55e1c69af003eb2b..776836e557574428e4855de0d1db9c14bb8fe5de 100644 (file)
@@ -546,6 +546,11 @@ r_object(RFILE *p)
                        int size;
                        PyLongObject *ob;
                        n = r_long(p);
+                       if (n < -INT_MAX || n > INT_MAX) {
+                               PyErr_SetString(PyExc_ValueError,
+                                               "bad marshal data");
+                               return NULL;
+                       }
                        size = n<0 ? -n : n;
                        ob = _PyLong_New(size);
                        if (ob == NULL)
@@ -654,7 +659,7 @@ r_object(RFILE *p)
        case TYPE_INTERNED:
        case TYPE_STRING:
                n = r_long(p);
-               if (n < 0) {
+               if (n < 0 || n > INT_MAX) {
                        PyErr_SetString(PyExc_ValueError, "bad marshal data");
                        return NULL;
                }
@@ -689,7 +694,7 @@ r_object(RFILE *p)
                char *buffer;
 
                n = r_long(p);
-               if (n < 0) {
+               if (n < 0 || n > INT_MAX) {
                        PyErr_SetString(PyExc_ValueError, "bad marshal data");
                        return NULL;
                }
@@ -710,7 +715,7 @@ r_object(RFILE *p)
 
        case TYPE_TUPLE:
                n = r_long(p);
-               if (n < 0) {
+               if (n < 0 || n > INT_MAX) {
                        PyErr_SetString(PyExc_ValueError, "bad marshal data");
                        return NULL;
                }
@@ -733,7 +738,7 @@ r_object(RFILE *p)
 
        case TYPE_LIST:
                n = r_long(p);
-               if (n < 0) {
+               if (n < 0 || n > INT_MAX) {
                        PyErr_SetString(PyExc_ValueError, "bad marshal data");
                        return NULL;
                }
@@ -831,10 +836,11 @@ r_object(RFILE *p)
                        
                        v = NULL;
 
-                       argcount = r_long(p);
-                       nlocals = r_long(p);
-                       stacksize = r_long(p);
-                       flags = r_long(p);
+                        /* XXX ignore long->int overflows for now */
+                       argcount = (int)r_long(p);
+                       nlocals = (int)r_long(p);
+                       stacksize = (int)r_long(p);
+                       flags = (int)r_long(p);
                        code = r_object(p);
                        if (code == NULL)
                                goto code_error;
@@ -859,7 +865,7 @@ r_object(RFILE *p)
                        name = r_object(p);
                        if (name == NULL)
                                goto code_error;
-                       firstlineno = r_long(p);
+                       firstlineno = (int)r_long(p);
                        lnotab = r_object(p);
                        if (lnotab == NULL)
                                goto code_error;
@@ -1031,10 +1037,16 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
        wf.strings = (version > 0) ? PyDict_New() : NULL;
        w_object(x, &wf);
        Py_XDECREF(wf.strings);
-       if (wf.str != NULL)
-               _PyString_Resize(&wf.str,
-                   (int) (wf.ptr -
-                          PyString_AS_STRING((PyStringObject *)wf.str)));
+       if (wf.str != NULL) {
+               char *base = PyString_AS_STRING((PyStringObject *)wf.str);
+               if (wf.ptr - base > PY_SSIZE_T_MAX) {
+                       Py_DECREF(wf.str);
+                       PyErr_SetString(PyExc_OverflowError,
+                                       "too much marshall data for a string");
+                       return NULL;
+               }
+               _PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base));
+       }
        if (wf.error) {
                Py_XDECREF(wf.str);
                PyErr_SetString(PyExc_ValueError,
index e291014ca4acd36328741388d3861a77bbd070fd..1aa3df2852c2226d084a6acb8cc4c47b281f5afc 100644 (file)
@@ -421,7 +421,7 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags)
                                                        "string too long for Python string");
                                                return NULL;
                                        }
-                                       n = (int)m;
+                                       n = (Py_ssize_t)m;
                                }
                                v = PyString_FromStringAndSize(str, n);
                        }
index 0dda4be2ee828b387fda3044f7cf09a520e95ffb..f0070575dbf68bdba5fb56cf945cd3ee7ffb3710 100644 (file)
@@ -195,13 +195,10 @@ overflowed:
        return (unsigned long)-1;
 }
 
-/* Checking for overflow in PyOS_strtol is a PITA since C doesn't define
- * anything about what happens when a signed integer operation overflows,
- * and some compilers think they're doing you a favor by being "clever"
- * then.  Python assumes a 2's-complement representation, so that the bit
- * pattern for the largest postive signed long is LONG_MAX, and for
- * the smallest negative signed long is LONG_MAX + 1.
+/* Checking for overflow in PyOS_strtol is a PITA; see comments
+ * about PY_ABS_LONG_MIN in longobject.c.
  */
+#define PY_ABS_LONG_MIN                (0-(unsigned long)LONG_MIN)
 
 long
 PyOS_strtol(char *str, char **ptr, int base)
@@ -224,8 +221,7 @@ PyOS_strtol(char *str, char **ptr, int base)
                if (sign == '-')
                        result = -result;
        }
-       else if (sign == '-' && uresult == (unsigned long)LONG_MAX + 1) {
-               assert(LONG_MIN == -LONG_MAX-1);
+       else if (sign == '-' && uresult == PY_ABS_LONG_MIN) {
                result = LONG_MIN;
        }
        else {
index 2dbe2839a681cf82b06d9dc7ebf0a248878286f1..6fbaba50252b2a30a53740bdba926eff2ebd2856 100644 (file)
@@ -1225,7 +1225,7 @@ makepathobject(char *path, int delim)
                p = strchr(path, delim);
                if (p == NULL)
                        p = strchr(path, '\0'); /* End of string */
-               w = PyString_FromStringAndSize(path, (int) (p - path));
+               w = PyString_FromStringAndSize(path, (Py_ssize_t) (p - path));
                if (w == NULL) {
                        Py_DECREF(v);
                        return NULL;