Close #19946: use runpy as needed in multiprocessing

author Nick Coghlan <ncoghlan@gmail.com>

Tue, 17 Dec 2013 12:17:26 +0000 (22:17 +1000)

committer Nick Coghlan <ncoghlan@gmail.com>

Tue, 17 Dec 2013 12:17:26 +0000 (22:17 +1000)
author Nick Coghlan <ncoghlan@gmail.com>
Tue, 17 Dec 2013 12:17:26 +0000 (22:17 +1000)
committer Nick Coghlan <ncoghlan@gmail.com>
Tue, 17 Dec 2013 12:17:26 +0000 (22:17 +1000)
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst

index 4cbc1e45c40ad2436ee9475e04c452e3b32d996a..5a4fa862e04fc0593602ceac363fdbfafc92637b 100644 (file)
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -624,13 +624,22 @@ mmap objects can now be weakref'ed.
  multiprocessing
  ---------------
  
-On Unix two new *start methods* have been added for starting processes
-using :mod:`multiprocessing`.  These make the mixing of processes with
-threads more robust.  See :issue:`8713`.
+On Unix, two new *start methods* (``spawn`` and ``forkserver``) have been
+added for starting processes using :mod:`multiprocessing`.  These make
+the mixing of processes with threads more robust, and the ``spawn``
+method matches the semantics that multiprocessing has always used on
+Windows. (Contributed by Richard Oudkerk in :issue:`8713`).
  
  Also, except when using the old *fork* start method, child processes
  will no longer inherit unneeded handles/file descriptors from their parents.
  
+:mod:`multiprocessing` now relies on :mod:`runpy` (which implements the
+``-m`` switch) to initialise ``__main__`` appropriately in child processes
+when using the ``spawn`` or ``forkserver`` start methods. This resolves some
+edge cases where combining multiprocessing, the ``-m`` command line switch
+and explicit relative imports could cause obscure failures in child
+processes. (Contributed by Nick Coghlan in :issue:`19946`)
+
  
  os
  --
diff --git a/Lib/multiprocessing/spawn.py b/Lib/multiprocessing/spawn.py

index c3adfc14a82028d8c352e2c2cecfb62ea0a285ba..c8297f3134cb8f901edc8c07b71d727bbecb30f5 100644 (file)
--- a/Lib/multiprocessing/spawn.py
+++ b/Lib/multiprocessing/spawn.py
@@ -11,6 +11,8 @@
  import os
  import pickle
  import sys
+import runpy
+import types
  
  from . import get_start_method, set_start_method
  from . import process
@@ -157,15 +159,19 @@ def get_preparation_data(name):
          start_method=get_start_method(),
          )
  
-    if sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
-        main_path = getattr(sys.modules['__main__'], '__file__', None)
-        if not main_path and sys.argv[0] not in ('', '-c'):
-            main_path = sys.argv[0]
+    # Figure out whether to initialise main in the subprocess as a module
+    # or through direct execution (or to leave it alone entirely)
+    main_module = sys.modules['__main__']
+    main_mod_name = getattr(main_module.__spec__, "name", None)
+    if main_mod_name is not None:
+        d['init_main_from_name'] = main_mod_name
+    elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE):
+        main_path = getattr(main_module, '__file__', None)
          if main_path is not None:
              if (not os.path.isabs(main_path) and
                          process.ORIGINAL_DIR is not None):
                  main_path = os.path.join(process.ORIGINAL_DIR, main_path)
-            d['main_path'] = os.path.normpath(main_path)
+            d['init_main_from_path'] = os.path.normpath(main_path)
  
      return d
  
@@ -206,55 +212,68 @@ def prepare(data):
      if 'start_method' in data:
          set_start_method(data['start_method'])
  
-    if 'main_path' in data:
-        import_main_path(data['main_path'])
+    if 'init_main_from_name' in data:
+        _fixup_main_from_name(data['init_main_from_name'])
+    elif 'init_main_from_path' in data:
+        _fixup_main_from_path(data['init_main_from_path'])
+
+# Multiprocessing module helpers to fix up the main module in
+# spawned subprocesses
+def _fixup_main_from_name(mod_name):
+    # __main__.py files for packages, directories, zip archives, etc, run
+    # their "main only" code unconditionally, so we don't even try to
+    # populate anything in __main__, nor do we make any changes to
+    # __main__ attributes
+    current_main = sys.modules['__main__']
+    if mod_name == "__main__" or mod_name.endswith(".__main__"):
+        return
+
+    # If this process was forked, __main__ may already be populated
+    if getattr(current_main.__spec__, "name", None) == mod_name:
+        return
+
+    # Otherwise, __main__ may contain some non-main code where we need to
+    # support unpickling it properly. We rerun it as __mp_main__ and make
+    # the normal __main__ an alias to that
+    old_main_modules.append(current_main)
+    main_module = types.ModuleType("__mp_main__")
+    main_content = runpy.run_module(mod_name,
+                                    run_name="__mp_main__",
+                                    alter_sys=True)
+    main_module.__dict__.update(main_content)
+    sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
+
+
+def _fixup_main_from_path(main_path):
+    # If this process was forked, __main__ may already be populated
+    current_main = sys.modules['__main__']
+
+    # Unfortunately, the main ipython launch script historically had no
+    # "if __name__ == '__main__'" guard, so we work around that
+    # by treating it like a __main__.py file
+    # See https://github.com/ipython/ipython/issues/4698
+    main_name = os.path.splitext(os.path.basename(main_path))[0]
+    if main_name == 'ipython':
+        return
+
+    # Otherwise, if __file__ already has the setting we expect,
+    # there's nothing more to do
+    if getattr(current_main, '__file__', None) == main_path:
+        return
+
+    # If the parent process has sent a path through rather than a module
+    # name we assume it is an executable script that may contain
+    # non-main code that needs to be executed
+    old_main_modules.append(current_main)
+    main_module = types.ModuleType("__mp_main__")
+    main_content = runpy.run_path(main_path,
+                                  run_name="__mp_main__")
+    main_module.__dict__.update(main_content)
+    sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
  
  
  def import_main_path(main_path):
      '''
      Set sys.modules['__main__'] to module at main_path
      '''
-    # XXX (ncoghlan): The following code makes several bogus
-    # assumptions regarding the relationship between __file__
-    # and a module's real name. See PEP 302 and issue #10845
-    if getattr(sys.modules['__main__'], '__file__', None) == main_path:
-        return
-
-    main_name = os.path.splitext(os.path.basename(main_path))[0]
-    if main_name == '__init__':
-        main_name = os.path.basename(os.path.dirname(main_path))
-
-    if main_name == '__main__':
-        main_module = sys.modules['__main__']
-        main_module.__file__ = main_path
-    elif main_name != 'ipython':
-        # Main modules not actually called __main__.py may
-        # contain additional code that should still be executed
-        import importlib
-        import types
-
-        if main_path is None:
-            dirs = None
-        elif os.path.basename(main_path).startswith('__init__.py'):
-            dirs = [os.path.dirname(os.path.dirname(main_path))]
-        else:
-            dirs = [os.path.dirname(main_path)]
-
-        assert main_name not in sys.modules, main_name
-        sys.modules.pop('__mp_main__', None)
-        # We should not try to load __main__
-        # since that would execute 'if __name__ == "__main__"'
-        # clauses, potentially causing a psuedo fork bomb.
-        main_module = types.ModuleType(main_name)
-        # XXX Use a target of main_module?
-        spec = importlib.find_spec(main_name, path=dirs)
-        if spec is None:
-            raise ImportError(name=main_name)
-        methods = importlib._bootstrap._SpecMethods(spec)
-        methods.init_module_attrs(main_module)
-        main_module.__name__ = '__mp_main__'
-        code = spec.loader.get_code(main_name)
-        exec(code, main_module.__dict__)
-
-        old_main_modules.append(sys.modules['__main__'])
-        sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module
+    _fixup_main_from_path(main_path)
diff --git a/Lib/test/test_multiprocessing_main_handling.py b/Lib/test/test_multiprocessing_main_handling.py

new file mode 100644 (file)

index 0000000..007fc54
--- /dev/null
+++ b/Lib/test/test_multiprocessing_main_handling.py
@@ -0,0 +1,287 @@
+# tests __main__ module handling in multiprocessing
+
+import importlib
+import importlib.machinery
+import zipimport
+import unittest
+import sys
+import os
+import os.path
+import py_compile
+
+from test import support
+from test.script_helper import (
+    make_pkg, make_script, make_zip_pkg, make_zip_script,
+    assert_python_ok, assert_python_failure, temp_dir,
+    spawn_python, kill_python)
+
+# We look inside the context module to find out which
+# start methods we can check
+from multiprocessing.context import _concrete_contexts
+
+verbose = support.verbose
+
+test_source = """\
+# multiprocessing includes all sorts of shenanigans to make __main__
+# attributes accessible in the subprocess in a pickle compatible way.
+
+# We run the "doesn't work in the interactive interpreter" example from
+# the docs to make sure it *does* work from an executed __main__,
+# regardless of the invocation mechanism
+
+import sys
+import time
+from multiprocessing import Pool, set_start_method
+
+# We use this __main__ defined function in the map call below in order to
+# check that multiprocessing in correctly running the unguarded
+# code in child processes and then making it available as __main__
+def f(x):
+    return x*x
+
+# Check explicit relative imports
+if "check_sibling" in __file__:
+    # We're inside a package and not in a __main__.py file
+    # so make sure explicit relative imports work correctly
+    from . import sibling
+
+if __name__ == '__main__':
+    start_method = sys.argv[1]
+    set_start_method(start_method)
+    p = Pool(5)
+    results = []
+    p.map_async(f, [1, 2, 3], callback=results.extend)
+    deadline = time.time() + 2 # up to 2 s to report the results
+    while not results:
+        time.sleep(0.05)
+        if time.time() > deadline:
+            raise RuntimeError("Timed out waiting for results")
+    results.sort()
+    print(start_method, "->", results)
+"""
+
+test_source_main_skipped_in_children = """\
+# __main__.py files have an implied "if __name__ == '__main__'" so
+# multiprocessing should always skip running them in child processes
+
+# This means we can't use __main__ defined functions in child processes,
+# so we just use "int" as a passthrough operation below
+
+if __name__ != "__main__":
+    raise RuntimeError("Should only be called as __main__!")
+
+import sys
+import time
+from multiprocessing import Pool, set_start_method
+
+start_method = sys.argv[1]
+set_start_method(start_method)
+p = Pool(5)
+results = []
+p.map_async(int, [1, 4, 9], callback=results.extend)
+deadline = time.time() + 2 # up to 2 s to report the results
+while not results:
+    time.sleep(0.05)
+    if time.time() > deadline:
+        raise RuntimeError("Timed out waiting for results")
+results.sort()
+print(start_method, "->", results)
+"""
+
+# These helpers were copied from test_cmd_line_script & tweaked a bit...
+
+def _make_test_script(script_dir, script_basename,
+                      source=test_source, omit_suffix=False):
+    to_return = make_script(script_dir, script_basename,
+                            source, omit_suffix)
+    # Hack to check explicit relative imports
+    if script_basename == "check_sibling":
+        make_script(script_dir, "sibling", "")
+    importlib.invalidate_caches()
+    return to_return
+
+def _make_test_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
+                       source=test_source, depth=1):
+    to_return = make_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename,
+                             source, depth)
+    importlib.invalidate_caches()
+    return to_return
+
+# There's no easy way to pass the script directory in to get
+# -m to work (avoiding that is the whole point of making
+# directories and zipfiles executable!)
+# So we fake it for testing purposes with a custom launch script
+launch_source = """\
+import sys, os.path, runpy
+sys.path.insert(0, %s)
+runpy._run_module_as_main(%r)
+"""
+
+def _make_launch_script(script_dir, script_basename, module_name, path=None):
+    if path is None:
+        path = "os.path.dirname(__file__)"
+    else:
+        path = repr(path)
+    source = launch_source % (path, module_name)
+    to_return = make_script(script_dir, script_basename, source)
+    importlib.invalidate_caches()
+    return to_return
+
+class MultiProcessingCmdLineMixin():
+    maxDiff = None # Show full tracebacks on subprocess failure
+
+    def setupClass(cls):
+        if cls.start_method not in _concrete_contexts:
+            raise unittest.SkipTest("%r start method not available" %
+                                                          cls.start_method)
+
+    def _check_output(self, script_name, exit_code, out, err):
+        if verbose > 1:
+            print("Output from test script %r:" % script_name)
+            print(out)
+        self.assertEqual(exit_code, 0)
+        self.assertEqual(err.decode('utf-8'), '')
+        expected_results = "%s -> [1, 4, 9]" % self.start_method
+        self.assertEqual(out.decode('utf-8').strip(), expected_results)
+
+    def _check_script(self, script_name, *cmd_line_switches):
+        if not __debug__:
+            cmd_line_switches += ('-' + 'O' * sys.flags.optimize,)
+        run_args = cmd_line_switches + (script_name, self.start_method)
+        rc, out, err = assert_python_ok(*run_args, __isolated=False)
+        self._check_output(script_name, rc, out, err)
+
+    def test_basic_script(self):
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, 'script')
+            self._check_script(script_name)
+
+    def test_basic_script_no_suffix(self):
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, 'script',
+                                            omit_suffix=True)
+            self._check_script(script_name)
+
+    def test_ipython_workaround(self):
+        # Some versions of the IPython launch script are missing the
+        # __name__ = "__main__" guard, and multiprocessing has long had
+        # a workaround for that case
+        # See https://github.com/ipython/ipython/issues/4698
+        source = test_source_main_skipped_in_children
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, 'ipython',
+                                            source=source)
+            self._check_script(script_name)
+            script_no_suffix = _make_test_script(script_dir, 'ipython',
+                                                 source=source,
+                                                 omit_suffix=True)
+            self._check_script(script_no_suffix)
+
+    def test_script_compiled(self):
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, 'script')
+            py_compile.compile(script_name, doraise=True)
+            os.remove(script_name)
+            pyc_file = support.make_legacy_pyc(script_name)
+            self._check_script(pyc_file)
+
+    def test_directory(self):
+        source = self.main_in_children_source
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, '__main__',
+                                            source=source)
+            self._check_script(script_dir)
+
+    def test_directory_compiled(self):
+        source = self.main_in_children_source
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, '__main__',
+                                            source=source)
+            py_compile.compile(script_name, doraise=True)
+            os.remove(script_name)
+            pyc_file = support.make_legacy_pyc(script_name)
+            self._check_script(script_dir)
+
+    def test_zipfile(self):
+        source = self.main_in_children_source
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, '__main__',
+                                            source=source)
+            zip_name, run_name = make_zip_script(script_dir, 'test_zip', script_name)
+            self._check_script(zip_name)
+
+    def test_zipfile_compiled(self):
+        source = self.main_in_children_source
+        with temp_dir() as script_dir:
+            script_name = _make_test_script(script_dir, '__main__',
+                                            source=source)
+            compiled_name = py_compile.compile(script_name, doraise=True)
+            zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
+            self._check_script(zip_name)
+
+    def test_module_in_package(self):
+        with temp_dir() as script_dir:
+            pkg_dir = os.path.join(script_dir, 'test_pkg')
+            make_pkg(pkg_dir)
+            script_name = _make_test_script(pkg_dir, 'check_sibling')
+            launch_name = _make_launch_script(script_dir, 'launch',
+                                              'test_pkg.check_sibling')
+            self._check_script(launch_name)
+
+    def test_module_in_package_in_zipfile(self):
+        with temp_dir() as script_dir:
+            zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script')
+            launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.script', zip_name)
+            self._check_script(launch_name)
+
+    def test_module_in_subpackage_in_zipfile(self):
+        with temp_dir() as script_dir:
+            zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script', depth=2)
+            launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.test_pkg.script', zip_name)
+            self._check_script(launch_name)
+
+    def test_package(self):
+        source = self.main_in_children_source
+        with temp_dir() as script_dir:
+            pkg_dir = os.path.join(script_dir, 'test_pkg')
+            make_pkg(pkg_dir)
+            script_name = _make_test_script(pkg_dir, '__main__',
+                                            source=source)
+            launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
+            self._check_script(launch_name)
+
+    def test_package_compiled(self):
+        source = self.main_in_children_source
+        with temp_dir() as script_dir:
+            pkg_dir = os.path.join(script_dir, 'test_pkg')
+            make_pkg(pkg_dir)
+            script_name = _make_test_script(pkg_dir, '__main__',
+                                            source=source)
+            compiled_name = py_compile.compile(script_name, doraise=True)
+            os.remove(script_name)
+            pyc_file = support.make_legacy_pyc(script_name)
+            launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg')
+            self._check_script(launch_name)
+
+# Test all supported start methods (setupClass skips as appropriate)
+
+class SpawnCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
+    start_method = 'spawn'
+    main_in_children_source = test_source_main_skipped_in_children
+
+class ForkCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
+    start_method = 'fork'
+    main_in_children_source = test_source
+
+class ForkServerCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase):
+    start_method = 'forkserver'
+    main_in_children_source = test_source_main_skipped_in_children
+
+def test_main():
+    support.run_unittest(SpawnCmdLineTest,
+                         ForkCmdLineTest,
+                         ForkServerCmdLineTest)
+    support.reap_children()
+
+if __name__ == '__main__':
+    test_main()
diff --git a/Misc/NEWS b/Misc/NEWS

index ee3c79302b909555946acd31240682b061f92605..e8ab0e7c3589b557424bde4ec1d2c647fc628787 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -44,6 +44,12 @@ Core and Builtins
  Library
  -------
  
+- Issue #19946: multiprocessing now uses runpy to initialize __main__ in
+  child processes when necessary, allowing it to correctly handle scripts
+  without suffixes and submodules that use explicit relative imports or
+  otherwise rely on parent modules being correctly imported prior to
+  execution.
+
  - Issue #19921: When Path.mkdir() is called with parents=True, any missing
    parent is created with the default permissions, ignoring the mode argument
    (mimicking the POSIX "mkdir -p" command).
author	Nick Coghlan <ncoghlan@gmail.com>
	Tue, 17 Dec 2013 12:17:26 +0000 (22:17 +1000)
committer	Nick Coghlan <ncoghlan@gmail.com>
	Tue, 17 Dec 2013 12:17:26 +0000 (22:17 +1000)
Doc/whatsnew/3.4.rst		patch \| blob \| history
Lib/multiprocessing/spawn.py		patch \| blob \| history
Lib/test/test_multiprocessing_main_handling.py	[new file with mode: 0644]	patch \| blob
Misc/NEWS		patch \| blob \| history