Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new importli...
authorAntoine Pitrou <solipsis@pitrou.net>
Mon, 20 Feb 2012 00:48:16 +0000 (01:48 +0100)
committerAntoine Pitrou <solipsis@pitrou.net>
Mon, 20 Feb 2012 00:48:16 +0000 (01:48 +0100)
importlib is now often faster than imp.find_module() at finding modules.

Doc/library/importlib.rst
Lib/importlib/__init__.py
Lib/importlib/_bootstrap.py
Lib/importlib/test/import_/test_path.py
Lib/test/test_import.py
Lib/test/test_reprlib.py
Misc/NEWS

index 6dd13cc7d4f9854c65bf00a36e5f1af824ff3f43..65b63c8753124d59f35bcd1f9b7f2c43947f895a 100644 (file)
@@ -86,6 +86,14 @@ Functions
     that was imported (e.g. ``pkg.mod``), while :func:`__import__` returns the
     top-level package or module (e.g. ``pkg``).
 
+.. function:: invalidate_caches()
+
+   Invalidate importlib's internal caches.  Calling this function may be
+   needed if some modules are installed while your program is running and
+   you expect the program to notice the changes.
+
+   .. versionadded:: 3.3
+
 
 :mod:`importlib.abc` -- Abstract base classes related to import
 ---------------------------------------------------------------
index 940a9a224113b83b258c492955cb8bb47d4a171f..50e9391a6a5ae805ff4d485106dba139f6a20dcd 100644 (file)
@@ -18,7 +18,7 @@ References on import:
           http://www.python.org/dev/peps/pep-0328
 
 """
-__all__ = ['__import__', 'import_module']
+__all__ = ['__import__', 'import_module', 'invalidate_caches']
 
 from . import _bootstrap
 
@@ -37,7 +37,7 @@ _bootstrap._setup(sys, imp)
 
 # Public API #########################################################
 
-from ._bootstrap import __import__
+from ._bootstrap import __import__, invalidate_caches
 
 
 def import_module(name, package=None):
index 1dfc3033b5c5a2ed8ff6bce6c9d4edbba72fa1e4..f69093747c0b677e938298f45358ad9c33e8c03a 100644 (file)
@@ -21,31 +21,16 @@ work. One should use importlib as the public-facing version of this module.
 
 CASE_INSENSITIVE_PLATFORMS = 'win', 'cygwin', 'darwin'
 
-def _case_insensitive_ok(directory, check):
-    """Check if the directory contains something matching 'check' exists in the
-    directory.
 
-    If PYTHONCASEOK is a defined environment variable then skip the
-    case-sensitivity check.
-
-    """
-    if b'PYTHONCASEOK' not in _os.environ:
-        if not directory:
-            directory = '.'
-        return check in _os.listdir(directory)
+def _relax_case():
+    """True if filenames must be checked case-insensitively."""
+    if any(map(sys.platform.startswith, CASE_INSENSITIVE_PLATFORMS)):
+        def _relax_case():
+            return b'PYTHONCASEOK' in _os.environ
     else:
-        return True
-
-def _case_sensitive_ok(directory, check):
-    """Under case-sensitive filesystems always assume the case matches.
-
-    Since other code does the file existence check, that subsumes a
-    case-sensitivity check.
-
-    """
-    return True
-
-_case_ok = None
+        def _relax_case():
+            return False
+    return _relax_case
 
 
 # TODO: Expose from marshal
@@ -172,6 +157,18 @@ code_type = type(_wrap.__code__)
 
 # Finder/loader utility code ##################################################
 
+_cache_refresh = 0
+
+def invalidate_caches():
+    """Invalidate importlib's internal caches.
+
+    Calling this function may be needed if some modules are installed while
+    your program is running and you expect the program to notice the changes.
+    """
+    global _cache_refresh
+    _cache_refresh += 1
+
+
 def set_package(fxn):
     """Set __package__ on the returned module."""
     def set_package_wrapper(*args, **kwargs):
@@ -708,7 +705,7 @@ class PathFinder:
 
         """
         if path == '':
-            path = _os.getcwd()
+            path = '.'
         try:
             finder = sys.path_importer_cache[path]
         except KeyError:
@@ -760,29 +757,55 @@ class _FileFinder:
                                 for suffix in detail.suffixes)
         self.packages = packages
         self.modules = modules
-        self.path = path
+        # Base (directory) path
+        self.path = path or '.'
+        self._path_mtime = -1
+        self._path_cache = set()
+        self._cache_refresh = 0
 
     def find_module(self, fullname):
         """Try to find a loader for the specified module."""
         tail_module = fullname.rpartition('.')[2]
-        base_path = _path_join(self.path, tail_module)
-        if _path_isdir(base_path) and _case_ok(self.path, tail_module):
-            for suffix, loader in self.packages:
-                init_filename = '__init__' + suffix
-                full_path = _path_join(base_path, init_filename)
-                if (_path_isfile(full_path) and
-                        _case_ok(base_path, init_filename)):
-                    return loader(fullname, full_path)
-            else:
-                msg = "Not importing directory {}: missing __init__"
-                _warnings.warn(msg.format(base_path), ImportWarning)
+        if _relax_case():
+            tail_module = tail_module.lower()
+        try:
+            mtime = _os.stat(self.path).st_mtime
+        except OSError:
+            mtime = -1
+        if mtime != self._path_mtime or _cache_refresh != self._cache_refresh:
+            self._fill_cache()
+            self._path_mtime = mtime
+            self._cache_refresh = _cache_refresh
+        cache = self._path_cache
+        if tail_module in cache:
+            base_path = _path_join(self.path, tail_module)
+            if _path_isdir(base_path):
+                for suffix, loader in self.packages:
+                    init_filename = '__init__' + suffix
+                    full_path = _path_join(base_path, init_filename)
+                    if _path_isfile(full_path):
+                        return loader(fullname, full_path)
+                else:
+                    msg = "Not importing directory {}: missing __init__"
+                    _warnings.warn(msg.format(base_path), ImportWarning)
         for suffix, loader in self.modules:
             mod_filename = tail_module + suffix
-            full_path = _path_join(self.path, mod_filename)
-            if _path_isfile(full_path) and _case_ok(self.path, mod_filename):
-                return loader(fullname, full_path)
+            if mod_filename in cache:
+                full_path = _path_join(self.path, mod_filename)
+                if _path_isfile(full_path):
+                    return loader(fullname, full_path)
         return None
 
+    def _fill_cache(self):
+        """Fill the cache of potential modules and packages for this directory."""
+        path = self.path
+        contents = _os.listdir(path)
+        if _relax_case():
+            self._path_cache = set(fn.lower() for fn in contents)
+        else:
+            self._path_cache = set(contents)
+
+
 class _SourceFinderDetails:
 
     loader = _SourceFileLoader
@@ -1060,7 +1083,7 @@ def _setup(sys_module, imp_module):
     modules, those two modules must be explicitly passed in.
 
     """
-    global _case_ok, imp, sys
+    global imp, sys
     imp = imp_module
     sys = sys_module
 
@@ -1093,12 +1116,8 @@ def _setup(sys_module, imp_module):
         raise ImportError('importlib requires posix or nt')
     setattr(self_module, '_os', os_module)
     setattr(self_module, 'path_sep', path_sep)
-
-    if any(sys_module.platform.startswith(x)
-            for x in CASE_INSENSITIVE_PLATFORMS):
-        _case_ok = _case_insensitive_ok
-    else:
-        _case_ok = _case_sensitive_ok
+    # Constants
+    setattr(self_module, '_relax_case', _relax_case())
 
 
 def _install(sys_module, imp_module):
index 61fe2260d4f2458ff4be86ef58d1a2292de82521..57133196126a69cd00d6fb162f824b2efa007776 100644 (file)
@@ -78,11 +78,11 @@ class FinderTests(unittest.TestCase):
         path = ''
         module = '<test module>'
         importer = util.mock_modules(module)
-        hook = import_util.mock_path_hook(os.getcwd(), importer=importer)
+        hook = import_util.mock_path_hook(os.curdir, importer=importer)
         with util.import_state(path=[path], path_hooks=[hook]):
             loader = machinery.PathFinder.find_module(module)
             self.assertIs(loader, importer)
-            self.assertIn(os.getcwd(), sys.path_importer_cache)
+            self.assertIn(os.curdir, sys.path_importer_cache)
 
 
 class DefaultPathFinderTests(unittest.TestCase):
index 62fdf4ba6e14925875a69dc69c02185126a78c44..bd2da7210559d81c4f5e9b3f80053f38a425c907 100644 (file)
@@ -2,6 +2,7 @@ import builtins
 import imp
 from importlib.test.import_ import test_relative_imports
 from importlib.test.import_ import util as importlib_util
+import importlib
 import marshal
 import os
 import platform
@@ -34,6 +35,7 @@ class ImportTests(unittest.TestCase):
 
     def setUp(self):
         remove_files(TESTFN)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         unload(TESTFN)
@@ -107,6 +109,7 @@ class ImportTests(unittest.TestCase):
                 create_empty_file(fname)
                 fn = imp.cache_from_source(fname)
                 unlink(fn)
+                importlib.invalidate_caches()
                 __import__(TESTFN)
                 if not os.path.exists(fn):
                     self.fail("__import__ did not result in creation of "
@@ -260,6 +263,7 @@ class ImportTests(unittest.TestCase):
             os.remove(source)
             del sys.modules[TESTFN]
             make_legacy_pyc(source)
+            importlib.invalidate_caches()
             mod = __import__(TESTFN)
             base, ext = os.path.splitext(mod.__file__)
             self.assertIn(ext, ('.pyc', '.pyo'))
@@ -358,6 +362,7 @@ func_filename = func.__code__.co_filename
         with open(self.file_name, "w") as f:
             f.write(self.module_source)
         sys.path.insert(0, self.dir_name)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         sys.path[:] = self.sys_path
@@ -552,6 +557,7 @@ class PycacheTests(unittest.TestCase):
         with open(self.source, 'w') as fp:
             print('# This is a test file written by test_import.py', file=fp)
         sys.path.insert(0, os.curdir)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]'
@@ -599,6 +605,7 @@ class PycacheTests(unittest.TestCase):
         pyc_file = make_legacy_pyc(self.source)
         os.remove(self.source)
         unload(TESTFN)
+        importlib.invalidate_caches()
         m = __import__(TESTFN)
         self.assertEqual(m.__file__,
                          os.path.join(os.curdir, os.path.relpath(pyc_file)))
@@ -619,6 +626,7 @@ class PycacheTests(unittest.TestCase):
         pyc_file = make_legacy_pyc(self.source)
         os.remove(self.source)
         unload(TESTFN)
+        importlib.invalidate_caches()
         m = __import__(TESTFN)
         self.assertEqual(m.__cached__,
                          os.path.join(os.curdir, os.path.relpath(pyc_file)))
index 0365cea5ff157e4911980f03380b19ae61a9c840..e75ba1ce90936b405672dbc55123c183024ae78f 100644 (file)
@@ -6,6 +6,7 @@
 import sys
 import os
 import shutil
+import importlib
 import unittest
 
 from test.support import run_unittest, create_empty_file
@@ -212,6 +213,7 @@ class LongReprTest(unittest.TestCase):
         # Remember where we are
         self.here = os.getcwd()
         sys.path.insert(0, self.here)
+        importlib.invalidate_caches()
 
     def tearDown(self):
         actions = []
index 61b40354817bb1354c8540da2b8d81089b881f1b..5c24b99c8c18b344ae7b2e172ab7fbc7fd81542d 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -469,6 +469,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
+  new importlib.invalidate_caches() function.
+
 - Issue #14001: CVE-2012-0845: xmlrpc: Fix an endless loop in
   SimpleXMLRPCServer upon malformed POST request.