]> granicus.if.org Git - python/commitdiff
closes bpo-34056: Always return bytes from _HackedGetData.get_data(). (GH-8130)
authorBenjamin Peterson <benjamin@python.org>
Sat, 7 Jul 2018 03:41:06 +0000 (20:41 -0700)
committerGitHub <noreply@github.com>
Sat, 7 Jul 2018 03:41:06 +0000 (20:41 -0700)
* Always return bytes from _HackedGetData.get_data().

Ensure the imp.load_source shim always returns bytes by reopening the file in
binary mode if needed. Hash-based pycs have to receive the source code in bytes.

It's tempting to change imp.get_suffixes() to always return 'rb' as a mode, but
that breaks some stdlib tests and likely 3rdparty code, too.

Lib/imp.py
Lib/test/test_imp.py
Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst [new file with mode: 0644]

index 866464b245b24c5de4503a055813dcd6b63a741e..31f8c766381adc3c125fedcc704277b1f6fa7968 100644 (file)
@@ -142,17 +142,16 @@ class _HackedGetData:
     def get_data(self, path):
         """Gross hack to contort loader to deal w/ load_*()'s bad API."""
         if self.file and path == self.path:
+            # The contract of get_data() requires us to return bytes. Reopen the
+            # file in binary mode if needed.
             if not self.file.closed:
                 file = self.file
-            else:
-                self.file = file = open(self.path, 'r')
+                if 'b' not in file.mode:
+                    file.close()
+            if self.file.closed:
+                self.file = file = open(self.path, 'rb')
 
             with file:
-                # Technically should be returning bytes, but
-                # SourceLoader.get_code() just passed what is returned to
-                # compile() which can handle str. And converting to bytes would
-                # require figuring out the encoding to decode to and
-                # tokenize.detect_encoding() only accepts bytes.
                 return file.read()
         else:
             return super().get_data(path)
index a115e60d4e4f088a05adcc12cfe2a2550c74d200..bb0144b12d4107f7ddfc28070552b0c7e4584d03 100644 (file)
@@ -2,6 +2,7 @@ import importlib
 import importlib.util
 import os
 import os.path
+import py_compile
 import sys
 from test import support
 from test.support import script_helper
@@ -350,6 +351,20 @@ class ImportTests(unittest.TestCase):
             res = script_helper.assert_python_ok(*args)
             self.assertEqual(res.out.strip().decode('utf-8'), expected)
 
+    def test_find_and_load_checked_pyc(self):
+        # issue 34056
+        with support.temp_cwd():
+            with open('mymod.py', 'wb') as fp:
+                fp.write(b'x = 42\n')
+            py_compile.compile(
+                'mymod.py',
+                doraise=True,
+                invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH,
+            )
+            file, path, description = imp.find_module('mymod', path=['.'])
+            mod = imp.load_module('mymod', file, path, description)
+        self.assertEqual(mod.x, 42)
+
 
 class ReloadTests(unittest.TestCase):
 
diff --git a/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst b/Misc/NEWS.d/next/Library/2018-07-05-22-45-46.bpo-34056.86isrU.rst
new file mode 100644 (file)
index 0000000..edc0135
--- /dev/null
@@ -0,0 +1,3 @@
+Ensure the loader shim created by ``imp.load_module`` always returns bytes
+from its ``get_data()`` function. This fixes using ``imp.load_module`` with
+:pep:`552` hash-based pycs.