]> granicus.if.org Git - python/commitdiff
Issue #16104: Allow compileall to do parallel bytecode compilation.
authorBrett Cannon <brett@python.org>
Fri, 12 Sep 2014 14:39:48 +0000 (10:39 -0400)
committerBrett Cannon <brett@python.org>
Fri, 12 Sep 2014 14:39:48 +0000 (10:39 -0400)
Both compileall.compile_dir() and the CLI for compileall now allow for
specifying how many workers to use (or 0 to use all CPUs).

Thanks to Claudiu Popa for the patch.

Doc/library/compileall.rst
Doc/whatsnew/3.5.rst
Lib/compileall.py
Lib/test/test_compileall.py

index 104f33a97368a4de17e5ef37544813bd050b9b22..b11d175459e2ba80427e8c00248399335dd07935 100644 (file)
@@ -73,12 +73,18 @@ compile Python sources.
    :program:`python -m compileall <directory> -r 0` is equivalent to
    :program:`python -m compileall <directory> -l`.
 
+.. cmdoption:: -j N
+
+   Use *N* workers to compile the files within the given directory.
+   If ``0`` is used, then the result of :func:`os.cpu_count()`
+   will be used.
 
 .. versionchanged:: 3.2
    Added the ``-i``, ``-b`` and ``-h`` options.
 
 .. versionchanged:: 3.5
-   Added the ``-r`` option.
+   Added the  ``-j`` and ``-r`` options.
+
 
 There is no command-line option to control the optimization level used by the
 :func:`compile` function, because the Python interpreter itself already
@@ -87,7 +93,7 @@ provides the option: :program:`python -O -m compileall`.
 Public functions
 ----------------
 
-.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1)
+.. function:: compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1, workers=1)
 
    Recursively descend the directory tree named by *dir*, compiling all :file:`.py`
    files along the way.
@@ -120,9 +126,18 @@ Public functions
    *optimize* specifies the optimization level for the compiler.  It is passed to
    the built-in :func:`compile` function.
 
+   The argument *workers* specifies how many workers are used to
+   compile files in parallel. The default is to not use multiple workers.
+   If the platform can't use multiple workers and *workers* argument is given,
+   then a :exc:`NotImplementedError` will be raised.
+   If *workers* is lower than ``0``, a :exc:`ValueError` will be raised.
+
    .. versionchanged:: 3.2
       Added the *legacy* and *optimize* parameter.
 
+   .. versionchanged:: 3.5
+      Added the *workers* parameter.
+
 
 .. function:: compile_file(fullname, ddir=None, force=False, rx=None, quiet=False, legacy=False, optimize=-1)
 
index d72e7f3f6bb7ddd326fcebb8ef3a755a6de81d09..2e183a9a8a8b5c0cad704e17f063f18e68bf799c 100644 (file)
@@ -134,6 +134,13 @@ New Modules
 Improved Modules
 ================
 
+compileall
+----------
+
+* :func:`compileall.compile_dir` and :mod:`compileall`'s command-line interface
+  can now do parallel bytecode compilation.
+  (Contributed by Claudiu Popa in :issue:`16104`).
+
 doctest
 -------
 
index 513d899e342bb7465383f9f53bfa00d88f419e4f..f1c9d27ad6ff6a774a3b478a12be6b22c8404e8b 100644 (file)
@@ -16,23 +16,15 @@ import importlib.util
 import py_compile
 import struct
 
-__all__ = ["compile_dir","compile_file","compile_path"]
+try:
+    from concurrent.futures import ProcessPoolExecutor
+except ImportError:
+    ProcessPoolExecutor = None
+from functools import partial
 
-def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
-                quiet=False, legacy=False, optimize=-1):
-    """Byte-compile all modules in the given directory tree.
-
-    Arguments (only dir is required):
+__all__ = ["compile_dir","compile_file","compile_path"]
 
-    dir:       the directory to byte-compile
-    maxlevels: maximum recursion level (default 10)
-    ddir:      the directory that will be prepended to the path to the
-               file as it is compiled into each byte-code file.
-    force:     if True, force compilation, even if timestamps are up-to-date
-    quiet:     if True, be quiet during compilation
-    legacy:    if True, produce legacy pyc paths instead of PEP 3147 paths
-    optimize:  optimization level or -1 for level of the interpreter
-    """
+def _walk_dir(dir, ddir=None, maxlevels=10, quiet=False):
     if not quiet:
         print('Listing {!r}...'.format(dir))
     try:
@@ -41,7 +33,6 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
         print("Can't list {!r}".format(dir))
         names = []
     names.sort()
-    success = 1
     for name in names:
         if name == '__pycache__':
             continue
@@ -51,13 +42,50 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
         else:
             dfile = None
         if not os.path.isdir(fullname):
-            if not compile_file(fullname, ddir, force, rx, quiet,
-                                legacy, optimize):
-                success = 0
+            yield fullname
         elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
               os.path.isdir(fullname) and not os.path.islink(fullname)):
-            if not compile_dir(fullname, maxlevels - 1, dfile, force, rx,
-                               quiet, legacy, optimize):
+            yield from _walk_dir(fullname, ddir=dfile,
+                                 maxlevels=maxlevels - 1, quiet=quiet)
+
+def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
+                quiet=False, legacy=False, optimize=-1, workers=1):
+    """Byte-compile all modules in the given directory tree.
+
+    Arguments (only dir is required):
+
+    dir:       the directory to byte-compile
+    maxlevels: maximum recursion level (default 10)
+    ddir:      the directory that will be prepended to the path to the
+               file as it is compiled into each byte-code file.
+    force:     if True, force compilation, even if timestamps are up-to-date
+    quiet:     if True, be quiet during compilation
+    legacy:    if True, produce legacy pyc paths instead of PEP 3147 paths
+    optimize:  optimization level or -1 for level of the interpreter
+    workers:   maximum number of parallel workers
+    """
+    files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels,
+                      ddir=ddir)
+    success = 1
+    if workers is not None and workers != 1:
+        if workers < 0:
+            raise ValueError('workers must be greater or equal to 0')
+        if ProcessPoolExecutor is None:
+            raise NotImplementedError('multiprocessing support not available')
+
+        workers = workers or None
+        with ProcessPoolExecutor(max_workers=workers) as executor:
+            results = executor.map(partial(compile_file,
+                                           ddir=ddir, force=force,
+                                           rx=rx, quiet=quiet,
+                                           legacy=legacy,
+                                           optimize=optimize),
+                                   files)
+            success = min(results, default=1)
+    else:
+        for file in files:
+            if not compile_file(file, ddir, force, rx, quiet,
+                                legacy, optimize):
                 success = 0
     return success
 
@@ -196,8 +224,10 @@ def main():
                         help=('zero or more file and directory names '
                               'to compile; if no arguments given, defaults '
                               'to the equivalent of -l sys.path'))
-    args = parser.parse_args()
+    parser.add_argument('-j', '--workers', default=1,
+                        type=int, help='Run compileall concurrently')
 
+    args = parser.parse_args()
     compile_dests = args.compile_dest
 
     if (args.ddir and (len(compile_dests) != 1
@@ -223,6 +253,9 @@ def main():
             print("Error reading file list {}".format(args.flist))
             return False
 
+    if args.workers is not None:
+        args.workers = args.workers or None
+
     success = True
     try:
         if compile_dests:
@@ -234,7 +267,7 @@ def main():
                 else:
                     if not compile_dir(dest, maxlevels, args.ddir,
                                        args.force, args.rx, args.quiet,
-                                       args.legacy):
+                                       args.legacy, workers=args.workers):
                         success = False
             return success
         else:
index ba1765e6f8734fac53491132a16816c6faacf383..877d6f27f56260ead402a7f8a4cb31c62c1e9222 100644 (file)
@@ -10,6 +10,13 @@ import time
 import unittest
 import io
 
+from unittest import mock, skipUnless
+try:
+    from concurrent.futures import ProcessPoolExecutor
+    _have_multiprocessing = True
+except ImportError:
+    _have_multiprocessing = False
+
 from test import support, script_helper
 
 class CompileallTests(unittest.TestCase):
@@ -106,6 +113,33 @@ class CompileallTests(unittest.TestCase):
                                                    debug_override=not optimize)
         self.assertTrue(os.path.isfile(cached3))
 
+    @mock.patch('compileall.ProcessPoolExecutor')
+    def test_compile_pool_called(self, pool_mock):
+        compileall.compile_dir(self.directory, quiet=True, workers=5)
+        self.assertTrue(pool_mock.called)
+
+    def test_compile_workers_non_positive(self):
+        with self.assertRaisesRegex(ValueError,
+                                    "workers must be greater or equal to 0"):
+            compileall.compile_dir(self.directory, workers=-1)
+
+    @mock.patch('compileall.ProcessPoolExecutor')
+    def test_compile_workers_cpu_count(self, pool_mock):
+        compileall.compile_dir(self.directory, quiet=True, workers=0)
+        self.assertEqual(pool_mock.call_args[1]['max_workers'], None)
+
+    @mock.patch('compileall.ProcessPoolExecutor')
+    @mock.patch('compileall.compile_file')
+    def test_compile_one_worker(self, compile_file_mock, pool_mock):
+        compileall.compile_dir(self.directory, quiet=True)
+        self.assertFalse(pool_mock.called)
+        self.assertTrue(compile_file_mock.called)
+
+    @mock.patch('compileall.ProcessPoolExecutor', new=None)
+    def test_compile_missing_multiprocessing(self):
+        with self.assertRaisesRegex(NotImplementedError,
+                                    "multiprocessing support not available"):
+            compileall.compile_dir(self.directory, quiet=True, workers=5)
 
 class EncodingTest(unittest.TestCase):
     """Issue 6716: compileall should escape source code when printing errors
@@ -413,6 +447,29 @@ class CommandLineTests(unittest.TestCase):
         out = self.assertRunOK('badfilename')
         self.assertRegex(out, b"Can't list 'badfilename'")
 
+    @skipUnless(_have_multiprocessing, "requires multiprocessing")
+    def test_workers(self):
+        bar2fn = script_helper.make_script(self.directory, 'bar2', '')
+        files = []
+        for suffix in range(5):
+            pkgdir = os.path.join(self.directory, 'foo{}'.format(suffix))
+            os.mkdir(pkgdir)
+            fn = script_helper.make_script(pkgdir, '__init__', '')
+            files.append(script_helper.make_script(pkgdir, 'bar2', ''))
+
+        self.assertRunOK(self.directory, '-j', '0')
+        self.assertCompiled(bar2fn)
+        for file in files:
+            self.assertCompiled(file)
+
+    @mock.patch('compileall.compile_dir')
+    def test_workers_available_cores(self, compile_dir):
+        with mock.patch("sys.argv",
+                        new=[sys.executable, self.directory, "-j0"]):
+            compileall.main()
+            self.assertTrue(compile_dir.called)
+            self.assertEqual(compile_dir.call_args[-1]['workers'], None)
+
 
 if __name__ == "__main__":
     unittest.main()