]> granicus.if.org Git - python/commitdiff
Issue #20501: fileinput module no longer reads whole file into memory when using
authorSerhiy Storchaka <storchaka@gmail.com>
Wed, 26 Feb 2014 18:59:08 +0000 (20:59 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Wed, 26 Feb 2014 18:59:08 +0000 (20:59 +0200)
fileinput.hook_encoded.

Lib/fileinput.py
Lib/test/test_fileinput.py
Misc/NEWS

index 04e97bdb418e83dc9d12fc51ebc3a8dd77a131fc..21c2d1f9bb7604780aab22973fd4e1cff3bd3f4e 100644 (file)
@@ -387,9 +387,10 @@ def hook_compressed(filename, mode):
 
 
 def hook_encoded(encoding):
-    import codecs
+    import io
     def openhook(filename, mode):
-        return codecs.open(filename, mode, encoding)
+        mode = mode.replace('U', '').replace('b', '') or 'r'
+        return io.open(filename, mode, encoding=encoding, newline='')
     return openhook
 
 
index 84aed1ab0ca5b0506500e0e362ce1d9df0d45baf..5e83a2aa25dab1a38f745d99dace2366e75f252a 100644 (file)
@@ -218,8 +218,49 @@ class FileInputTests(unittest.TestCase):
         finally:
             remove_tempfiles(t1)
 
+    def test_readline(self):
+        with open(TESTFN, 'wb') as f:
+            f.write('A\nB\r\nC\r')
+            # Fill TextIOWrapper buffer.
+            f.write('123456789\n' * 1000)
+            # Issue #20501: readline() shouldn't read whole file.
+            f.write('\x80')
+        self.addCleanup(safe_unlink, TESTFN)
+
+        fi = FileInput(files=TESTFN, openhook=hook_encoded('ascii'), bufsize=8)
+        self.assertEqual(fi.readline(), u'A\n')
+        self.assertEqual(fi.readline(), u'B\r\n')
+        self.assertEqual(fi.readline(), u'C\r')
+        with self.assertRaises(UnicodeDecodeError):
+            # Read to the end of file.
+            list(fi)
+        fi.close()
+
+class Test_hook_encoded(unittest.TestCase):
+    """Unit tests for fileinput.hook_encoded()"""
+
+    def test_modes(self):
+        # Unlikely UTF-7 is locale encoding
+        with open(TESTFN, 'wb') as f:
+            f.write('A\nB\r\nC\rD+IKw-')
+        t1 = TESTFN
+        #t1 = writeTmp(1, ['A\nB\r\nC\rD+IKw-'], mode='wb')
+        self.addCleanup(safe_unlink, TESTFN)
+
+        def check(mode, expected_lines):
+            fi = FileInput(files=TESTFN, mode=mode,
+                           openhook=hook_encoded('utf-7'))
+            lines = list(fi)
+            fi.close()
+            self.assertEqual(lines, expected_lines)
+
+        check('r', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
+        check('rU', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
+        check('U', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
+        check('rb', [u'A\n', u'B\r\n', u'C\r', u'D\u20ac'])
+
 def test_main():
-    run_unittest(BufferSizesTests, FileInputTests)
+    run_unittest(BufferSizesTests, FileInputTests, Test_hook_encoded)
 
 if __name__ == "__main__":
     test_main()
index 49d1630213b0a7716c4061ad72f63eba02bd2712..02175d053c0cbf942c5f876c40b916a3e8625e2c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -40,6 +40,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #20501: fileinput module no longer reads whole file into memory when using
+  fileinput.hook_encoded.
+
 - Issue #6815: os.path.expandvars() now supports non-ASCII Unicode environment
   variables names and values.