]> granicus.if.org Git - python/commitdiff
use Py_ssize_t for file offset and length computations in iteration (closes #22526)
authorBenjamin Peterson <benjamin@python.org>
Wed, 1 Oct 2014 01:17:15 +0000 (21:17 -0400)
committerBenjamin Peterson <benjamin@python.org>
Wed, 1 Oct 2014 01:17:15 +0000 (21:17 -0400)
Lib/test/test_file2k.py
Misc/NEWS
Objects/fileobject.c

index fae1db6acc2eda5dd63ca82d075ede261d30c1de..14e5931b629b06538dfbbb5d47a10f2c9eb648a1 100644 (file)
@@ -436,6 +436,18 @@ class OtherFileTests(unittest.TestCase):
         finally:
             f.close()
 
+    @test_support.precisionbigmemtest(2**31, 1)
+    def test_very_long_line(self, maxsize):
+        # Issue #22526
+        with open(TESTFN, "wb") as fp:
+            fp.write("\0"*2**31)
+        with open(TESTFN, "rb") as fp:
+            for l in fp:
+                pass
+        self.assertEqual(len(l), 2**31)
+        self.assertEqual(l.count("\0"), 2**31)
+        l = None
+
 class FileSubclassTests(unittest.TestCase):
 
     def testExit(self):
index 888abf5a10800eec35f3a309ca9247277e5d9d8b..07e8855882be1bb0c22e45258ac0718710a84da3 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@ What's New in Python 2.7.9?
 Core and Builtins
 -----------------
 
+- Issue #22526: Fix iterating through files with lines longer than 2^31 bytes.
+
 - Issue #22519: Fix overflow checking in PyString_Repr.
 
 - Issue #22518: Fix integer overflow issues in latin-1 encoding.
index 559405869dcaed3e5467cd5dcc13d6d61a0130b7..55e074bcfdb3e9e00cbb561bbe941e02f1ae94af 100644 (file)
@@ -2236,7 +2236,7 @@ drop_readahead(PyFileObject *f)
    (unless at EOF) and no more than bufsize.  Returns negative value on
    error, will set MemoryError if bufsize bytes cannot be allocated. */
 static int
-readahead(PyFileObject *f, int bufsize)
+readahead(PyFileObject *f, Py_ssize_t bufsize)
 {
     Py_ssize_t chunksize;
 
@@ -2274,7 +2274,7 @@ readahead(PyFileObject *f, int bufsize)
    logarithmic buffer growth to about 50 even when reading a 1gb line. */
 
 static PyStringObject *
-readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
+readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize)
 {
     PyStringObject* s;
     char *bufptr;
@@ -2294,10 +2294,10 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
         bufptr++;                               /* Count the '\n' */
         len = bufptr - f->f_bufptr;
         s = (PyStringObject *)
-            PyString_FromStringAndSize(NULL, skip+len);
+            PyString_FromStringAndSize(NULL, skip + len);
         if (s == NULL)
             return NULL;
-        memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
+        memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len);
         f->f_bufptr = bufptr;
         if (bufptr == f->f_bufend)
             drop_readahead(f);
@@ -2305,14 +2305,13 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
         bufptr = f->f_bufptr;
         buf = f->f_buf;
         f->f_buf = NULL;                /* Force new readahead buffer */
-        assert(skip+len < INT_MAX);
-        s = readahead_get_line_skip(
-            f, (int)(skip+len), bufsize + (bufsize>>2) );
+        assert(len <= PY_SSIZE_T_MAX - skip);
+        s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2));
         if (s == NULL) {
             PyMem_Free(buf);
             return NULL;
         }
-        memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
+        memcpy(PyString_AS_STRING(s) + skip, bufptr, len);
         PyMem_Free(buf);
     }
     return s;