]> granicus.if.org Git - python/commitdiff
Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
authorSerhiy Storchaka <storchaka@gmail.com>
Wed, 27 Apr 2016 20:13:46 +0000 (23:13 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Wed, 27 Apr 2016 20:13:46 +0000 (23:13 +0300)
for passing to open.  Original patch by Joseph Hackman.

Doc/library/fileinput.rst
Doc/whatsnew/3.6.rst
Lib/fileinput.py
Lib/test/test_fileinput.py
Misc/ACKS
Misc/NEWS

index 343368265b581509379eec533c1076dd67bb4406..8efe8e3b94ba4362ab5361f016d93468542abebb 100644 (file)
@@ -193,10 +193,14 @@ The two following opening hooks are provided by this module:
    Usage example:  ``fi = fileinput.FileInput(openhook=fileinput.hook_compressed)``
 
 
-.. function:: hook_encoded(encoding)
+.. function:: hook_encoded(encoding, errors=None)
 
    Returns a hook which opens each file with :func:`open`, using the given
-   *encoding* to read the file.
+   *encoding* and *errors* to read the file.
 
    Usage example: ``fi =
-   fileinput.FileInput(openhook=fileinput.hook_encoded("iso-8859-1"))``
+   fileinput.FileInput(openhook=fileinput.hook_encoded("utf-8",
+   "surrogateescape"))``
+
+   .. versionchanged:: 3.6
+      Added the optional *errors* parameter.
index 99223af31db596fe481e710de19713f1dd04df12..be4c01409f4fa3d67b00bef788dbff468eba5dd6 100644 (file)
@@ -358,6 +358,13 @@ The :func:`~zlib.compress` function now accepts keyword arguments.
 (Contributed by Aviv Palivoda in :issue:`26243`.)
 
 
+fileinput
+---------
+
+:func:`~fileinput.hook_encoded` now supports the *errors* argument.
+(Contributed by Joseph Hackman in :issue:`25788`.)
+
+
 Optimizations
 =============
 
index 1e19d242136d05c13afbca53f7b168c402bf95c6..721fe9c9612c17698ee939c900c2bedf7e59c3f5 100644 (file)
@@ -400,9 +400,9 @@ def hook_compressed(filename, mode):
         return open(filename, mode)
 
 
-def hook_encoded(encoding):
+def hook_encoded(encoding, errors=None):
     def openhook(filename, mode):
-        return open(filename, mode, encoding=encoding)
+        return open(filename, mode, encoding=encoding, errors=errors)
     return openhook
 
 
index 4f67c25f908e0980abc9dd3986bceb688b14b3df..565633fcccd97c61a240387906a0c05abb6b1be2 100644 (file)
@@ -945,7 +945,8 @@ class Test_hook_encoded(unittest.TestCase):
 
     def test(self):
         encoding = object()
-        result = fileinput.hook_encoded(encoding)
+        errors = object()
+        result = fileinput.hook_encoded(encoding, errors=errors)
 
         fake_open = InvocationRecorder()
         original_open = builtins.open
@@ -963,8 +964,26 @@ class Test_hook_encoded(unittest.TestCase):
         self.assertIs(args[0], filename)
         self.assertIs(args[1], mode)
         self.assertIs(kwargs.pop('encoding'), encoding)
+        self.assertIs(kwargs.pop('errors'), errors)
         self.assertFalse(kwargs)
 
+    def test_errors(self):
+        with open(TESTFN, 'wb') as f:
+            f.write(b'\x80abc')
+        self.addCleanup(safe_unlink, TESTFN)
+
+        def check(errors, expected_lines):
+            with FileInput(files=TESTFN, mode='r',
+                           openhook=hook_encoded('utf-8', errors=errors)) as fi:
+                lines = list(fi)
+            self.assertEqual(lines, expected_lines)
+
+        check('ignore', ['abc'])
+        with self.assertRaises(UnicodeDecodeError):
+            check('strict', ['abc'])
+        check('replace', ['\ufffdabc'])
+        check('backslashreplace', ['\\x80abc'])
+
     def test_modes(self):
         with open(TESTFN, 'wb') as f:
             # UTF-7 is a convenient, seldom used encoding
index dd3a56747fcb325de575f738a6d78567fe1418be..ebc3fc614652d587c9ab0930944cc756d727ef62 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -538,6 +538,7 @@ Michael Guravage
 Lars Gustäbel
 Thomas Güttler
 Jonas H.
+Joseph Hackman
 Barry Haddow
 Philipp Hagemeister
 Paul ten Hagen
index b6fb8f8f67619696a0257720c37a0f43126dd5e3..e68bbdf3a74391fed364acb0dec597b1fd84417d 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -256,6 +256,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #25788: fileinput.hook_encoded() now supports an "errors" argument
+  for passing to open.  Original patch by Joseph Hackman.
+
 - Issue #26634: recursive_repr() now sets __qualname__ of wrapper.  Patch by
   Xiang Zhang.