From 95bc0e4703f6997b808240b3b24635bb2bebd781 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 30 Sep 2014 21:17:15 -0400 Subject: [PATCH] use Py_ssize_t for file offset and length computations in iteration (closes #22526) --- Lib/test/test_file2k.py | 12 ++++++++++++ Misc/NEWS | 2 ++ Objects/fileobject.c | 15 +++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_file2k.py b/Lib/test/test_file2k.py index fae1db6acc..14e5931b62 100644 --- a/Lib/test/test_file2k.py +++ b/Lib/test/test_file2k.py @@ -436,6 +436,18 @@ class OtherFileTests(unittest.TestCase): finally: f.close() + @test_support.precisionbigmemtest(2**31, 1) + def test_very_long_line(self, maxsize): + # Issue #22526 + with open(TESTFN, "wb") as fp: + fp.write("\0"*2**31) + with open(TESTFN, "rb") as fp: + for l in fp: + pass + self.assertEqual(len(l), 2**31) + self.assertEqual(l.count("\0"), 2**31) + l = None + class FileSubclassTests(unittest.TestCase): def testExit(self): diff --git a/Misc/NEWS b/Misc/NEWS index 888abf5a10..07e8855882 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,8 @@ What's New in Python 2.7.9? Core and Builtins ----------------- +- Issue #22526: Fix iterating through files with lines longer than 2^31 bytes. + - Issue #22519: Fix overflow checking in PyString_Repr. - Issue #22518: Fix integer overflow issues in latin-1 encoding. diff --git a/Objects/fileobject.c b/Objects/fileobject.c index 559405869d..55e074bcfd 100644 --- a/Objects/fileobject.c +++ b/Objects/fileobject.c @@ -2236,7 +2236,7 @@ drop_readahead(PyFileObject *f) (unless at EOF) and no more than bufsize. Returns negative value on error, will set MemoryError if bufsize bytes cannot be allocated. */ static int -readahead(PyFileObject *f, int bufsize) +readahead(PyFileObject *f, Py_ssize_t bufsize) { Py_ssize_t chunksize; @@ -2274,7 +2274,7 @@ readahead(PyFileObject *f, int bufsize) logarithmic buffer growth to about 50 even when reading a 1gb line. */ static PyStringObject * -readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) +readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize) { PyStringObject* s; char *bufptr; @@ -2294,10 +2294,10 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) bufptr++; /* Count the '\n' */ len = bufptr - f->f_bufptr; s = (PyStringObject *) - PyString_FromStringAndSize(NULL, skip+len); + PyString_FromStringAndSize(NULL, skip + len); if (s == NULL) return NULL; - memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len); + memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len); f->f_bufptr = bufptr; if (bufptr == f->f_bufend) drop_readahead(f); @@ -2305,14 +2305,13 @@ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) bufptr = f->f_bufptr; buf = f->f_buf; f->f_buf = NULL; /* Force new readahead buffer */ - assert(skip+len < INT_MAX); - s = readahead_get_line_skip( - f, (int)(skip+len), bufsize + (bufsize>>2) ); + assert(len <= PY_SSIZE_T_MAX - skip); + s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2)); if (s == NULL) { PyMem_Free(buf); return NULL; } - memcpy(PyString_AS_STRING(s)+skip, bufptr, len); + memcpy(PyString_AS_STRING(s) + skip, bufptr, len); PyMem_Free(buf); } return s; -- 2.50.1