Merged revisions 76295 via svnmerge from

author Mark Dickinson <dickinsm@gmail.com>

Sun, 15 Nov 2009 12:34:12 +0000 (12:34 +0000)

committer Mark Dickinson <dickinsm@gmail.com>

Sun, 15 Nov 2009 12:34:12 +0000 (12:34 +0000)
author Mark Dickinson <dickinsm@gmail.com>
Sun, 15 Nov 2009 12:34:12 +0000 (12:34 +0000)
committer Mark Dickinson <dickinsm@gmail.com>
Sun, 15 Nov 2009 12:34:12 +0000 (12:34 +0000)
diff --git a/Lib/test/test_xrange.py b/Lib/test/test_xrange.py

index 7e2a1a30581529e3a912a8fa84dd72be379ad396..dbfeb4383321bca8301e72834def25eb8a2bb0e0 100644 (file)
--- a/Lib/test/test_xrange.py
+++ b/Lib/test/test_xrange.py
@@ -3,12 +3,49 @@
  import test.test_support, unittest
  import sys
  import pickle
+import itertools
  
  import warnings
  warnings.filterwarnings("ignore", "integer argument expected",
                          DeprecationWarning, "unittest")
  
+# pure Python implementations (3 args only), for comparison
+def pyrange(start, stop, step):
+    if (start - stop) // step < 0:
+        # replace stop with next element in the sequence of integers
+        # that are congruent to start modulo step.
+        stop += (start - stop) % step
+        while start != stop:
+            yield start
+            start += step
+
+def pyrange_reversed(start, stop, step):
+    stop += (start - stop) % step
+    return pyrange(stop - step, start - step, -step)
+
+
  class XrangeTest(unittest.TestCase):
+    def assert_iterators_equal(self, xs, ys, test_id, limit=None):
+        # check that an iterator xs matches the expected results ys,
+        # up to a given limit.
+        if limit is not None:
+            xs = itertools.islice(xs, limit)
+            ys = itertools.islice(ys, limit)
+        sentinel = object()
+        pairs = itertools.izip_longest(xs, ys, fillvalue=sentinel)
+        for i, (x, y) in enumerate(pairs):
+            if x == y:
+                continue
+            elif x == sentinel:
+                self.fail('{0}: iterator ended unexpectedly '
+                          'at position {1}; expected {2}'.format(test_id, i, y))
+            elif y == sentinel:
+                self.fail('{0}: unexpected excess element {1} at '
+                          'position {2}'.format(test_id, x, i))
+            else:
+                self.fail('{0}: wrong element at position {1};'
+                          'expected {2}, got {3}'.format(test_id, i, y, x))
+
      def test_xrange(self):
          self.assertEqual(list(xrange(3)), [0, 1, 2])
          self.assertEqual(list(xrange(1, 5)), [1, 2, 3, 4])
@@ -67,6 +104,38 @@ class XrangeTest(unittest.TestCase):
                  self.assertEquals(list(pickle.loads(pickle.dumps(r, proto))),
                                    list(r))
  
+    def test_range_iterators(self):
+        # see issue 7298
+        limits = [base + jiggle
+                  for M in (2**32, 2**64)
+                  for base in (-M, -M//2, 0, M//2, M)
+                  for jiggle in (-2, -1, 0, 1, 2)]
+        test_ranges = [(start, end, step)
+                       for start in limits
+                       for end in limits
+                       for step in (-2**63, -2**31, -2, -1, 1, 2)]
+
+        for start, end, step in test_ranges:
+            try:
+                iter1 = xrange(start, end, step)
+            except OverflowError:
+                pass
+            else:
+                iter2 = pyrange(start, end, step)
+                test_id = "xrange({0}, {1}, {2})".format(start, end, step)
+                # check first 100 entries
+                self.assert_iterators_equal(iter1, iter2, test_id, limit=100)
+
+            try:
+                iter1 = reversed(xrange(start, end, step))
+            except OverflowError:
+                pass
+            else:
+                iter2 = pyrange_reversed(start, end, step)
+                test_id = "reversed(xrange({0}, {1}, {2}))".format(
+                    start, end, step)
+                self.assert_iterators_equal(iter1, iter2, test_id, limit=100)
+
  
  def test_main():
      test.test_support.run_unittest(XrangeTest)
diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c

index 76d384914e52adbc255f48e8b2d345c094a475c8..0fee40f6841f9732117cc24f37130ea9808bfcac 100644 (file)
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@@ -9,33 +9,32 @@ typedef struct {
         long    len;
  } rangeobject;
  
-/* Return number of items in range/xrange (lo, hi, step).  step > 0
- * required.  Return a value < 0 if & only if the true value is too
- * large to fit in a signed long.
+/* Return number of items in range (lo, hi, step).  step != 0
+ * required.  The result always fits in an unsigned long.
   */
-static long
+static unsigned long
  get_len_of_range(long lo, long hi, long step)
  {
-       /* -------------------------------------------------------------
-       If lo >= hi, the range is empty.
-       Else if n values are in the range, the last one is
-       lo + (n-1)*step, which must be <= hi-1.  Rearranging,
-       n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives
-       the proper value.  Since lo < hi in this case, hi-lo-1 >= 0, so
-       the RHS is non-negative and so truncation is the same as the
-       floor.  Letting M be the largest positive long, the worst case
-       for the RHS numerator is hi=M, lo=-M-1, and then
-       hi-lo-1 = M-(-M-1)-1 = 2*M.  Therefore unsigned long has enough
-       precision to compute the RHS exactly.
-       ---------------------------------------------------------------*/
-       long n = 0;
-       if (lo < hi) {
-               unsigned long uhi = (unsigned long)hi;
-               unsigned long ulo = (unsigned long)lo;
-               unsigned long diff = uhi - ulo - 1;
-               n = (long)(diff / (unsigned long)step + 1);
-       }
-       return n;
+    /* -------------------------------------------------------------
+    If step > 0 and lo >= hi, or step < 0 and lo <= hi, the range is empty.
+    Else for step > 0, if n values are in the range, the last one is
+    lo + (n-1)*step, which must be <= hi-1.  Rearranging,
+    n <= (hi - lo - 1)/step + 1, so taking the floor of the RHS gives
+    the proper value.  Since lo < hi in this case, hi-lo-1 >= 0, so
+    the RHS is non-negative and so truncation is the same as the
+    floor.  Letting M be the largest positive long, the worst case
+    for the RHS numerator is hi=M, lo=-M-1, and then
+    hi-lo-1 = M-(-M-1)-1 = 2*M.  Therefore unsigned long has enough
+    precision to compute the RHS exactly.  The analysis for step < 0
+    is similar.
+    ---------------------------------------------------------------*/
+    assert(step != 0);
+    if (step > 0 && lo < hi)
+        return 1UL + (hi - 1UL - lo) / step;
+    else if (step < 0 && lo > hi)
+        return 1UL + (lo - 1UL - hi) / (0UL - step);
+    else
+        return 0UL;
  }
  
  static PyObject *
@@ -43,7 +42,7 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw)
  {
         rangeobject *obj;
         long ilow = 0, ihigh = 0, istep = 1;
-       long n;
+       unsigned long n;
  
         if (!_PyArg_NoKeywords("xrange()", kw))
                 return NULL;
@@ -64,11 +63,8 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw)
                 PyErr_SetString(PyExc_ValueError, "xrange() arg 3 must not be zero");
                 return NULL;
         }
-       if (istep > 0)
-               n = get_len_of_range(ilow, ihigh, istep);
-       else
-               n = get_len_of_range(ihigh, ilow, -istep);
-       if (n < 0) {
+       n = get_len_of_range(ilow, ihigh, istep);
+       if (n > (unsigned long)LONG_MAX || (long)n > PY_SSIZE_T_MAX) {
                 PyErr_SetString(PyExc_OverflowError,
                                 "xrange() result has too many items");
                 return NULL;
@@ -78,7 +74,7 @@ range_new(PyTypeObject *type, PyObject *args, PyObject *kw)
         if (obj == NULL)
                 return NULL;
         obj->start = ilow;
-       obj->len   = n;
+       obj->len   = (long)n;
         obj->step  = istep;
         return (PyObject *) obj;
  }
@@ -98,7 +94,9 @@ range_item(rangeobject *r, Py_ssize_t i)
                                 "xrange object index out of range");
                 return NULL;
         }
-       return PyInt_FromSsize_t(r->start + i * r->step);
+       /* do calculation entirely using unsigned longs, to avoid
+          undefined behaviour due to signed overflow. */
+       return PyInt_FromLong((long)(r->start + (unsigned long)i * r->step));
  }
  
  static Py_ssize_t
@@ -304,9 +302,21 @@ range_reverse(PyObject *seq)
         len = ((rangeobject *)seq)->len;
  
         it->index = 0;
-       it->start = start + (len-1) * step;
-       it->step = -step;
         it->len = len;
+       /* the casts below guard against signed overflow by turning it
+          into unsigned overflow instead.  The correctness of this
+          code still depends on conversion from unsigned long to long
+          wrapping modulo ULONG_MAX+1, which isn't guaranteed (see
+          C99 6.3.1.3p3) but seems to hold in practice for all
+          platforms we're likely to meet.
+
+          If step == LONG_MIN then we still end up with LONG_MIN
+          after negation; but this works out, since we've still got
+          the correct value modulo ULONG_MAX+1, and the range_item
+          calculation is also done modulo ULONG_MAX+1.
+       */
+       it->start = (long)(start + (unsigned long)(len-1) * step);
+       it->step = (long)(-(unsigned long)step);
  
         return (PyObject *)it;
  }
author	Mark Dickinson <dickinsm@gmail.com>
	Sun, 15 Nov 2009 12:34:12 +0000 (12:34 +0000)
committer	Mark Dickinson <dickinsm@gmail.com>
	Sun, 15 Nov 2009 12:34:12 +0000 (12:34 +0000)
Lib/test/test_xrange.py		patch \| blob \| history
Objects/rangeobject.c		patch \| blob \| history