From 9a76735b1dd427f4b7d6712ca3bb2b8958e689e8 Mon Sep 17 00:00:00 2001 From: Nick Coghlan Date: Tue, 17 Dec 2013 22:17:26 +1000 Subject: [PATCH] Close #19946: use runpy as needed in multiprocessing - handles main files without a suffix - handles main submodules properly - adds test cases for the various kinds of __main__ --- Doc/whatsnew/3.4.rst | 15 +- Lib/multiprocessing/spawn.py | 121 ++++---- .../test_multiprocessing_main_handling.py | 287 ++++++++++++++++++ Misc/NEWS | 6 + 4 files changed, 375 insertions(+), 54 deletions(-) create mode 100644 Lib/test/test_multiprocessing_main_handling.py diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst index 4cbc1e45c4..5a4fa862e0 100644 --- a/Doc/whatsnew/3.4.rst +++ b/Doc/whatsnew/3.4.rst @@ -624,13 +624,22 @@ mmap objects can now be weakref'ed. multiprocessing --------------- -On Unix two new *start methods* have been added for starting processes -using :mod:`multiprocessing`. These make the mixing of processes with -threads more robust. See :issue:`8713`. +On Unix, two new *start methods* (``spawn`` and ``forkserver``) have been +added for starting processes using :mod:`multiprocessing`. These make +the mixing of processes with threads more robust, and the ``spawn`` +method matches the semantics that multiprocessing has always used on +Windows. (Contributed by Richard Oudkerk in :issue:`8713`). Also, except when using the old *fork* start method, child processes will no longer inherit unneeded handles/file descriptors from their parents. +:mod:`multiprocessing` now relies on :mod:`runpy` (which implements the +``-m`` switch) to initialise ``__main__`` appropriately in child processes +when using the ``spawn`` or ``forkserver`` start methods. This resolves some +edge cases where combining multiprocessing, the ``-m`` command line switch +and explicit relative imports could cause obscure failures in child +processes. (Contributed by Nick Coghlan in :issue:`19946`) + os -- diff --git a/Lib/multiprocessing/spawn.py b/Lib/multiprocessing/spawn.py index c3adfc14a8..c8297f3134 100644 --- a/Lib/multiprocessing/spawn.py +++ b/Lib/multiprocessing/spawn.py @@ -11,6 +11,8 @@ import os import pickle import sys +import runpy +import types from . import get_start_method, set_start_method from . import process @@ -157,15 +159,19 @@ def get_preparation_data(name): start_method=get_start_method(), ) - if sys.platform != 'win32' or (not WINEXE and not WINSERVICE): - main_path = getattr(sys.modules['__main__'], '__file__', None) - if not main_path and sys.argv[0] not in ('', '-c'): - main_path = sys.argv[0] + # Figure out whether to initialise main in the subprocess as a module + # or through direct execution (or to leave it alone entirely) + main_module = sys.modules['__main__'] + main_mod_name = getattr(main_module.__spec__, "name", None) + if main_mod_name is not None: + d['init_main_from_name'] = main_mod_name + elif sys.platform != 'win32' or (not WINEXE and not WINSERVICE): + main_path = getattr(main_module, '__file__', None) if main_path is not None: if (not os.path.isabs(main_path) and process.ORIGINAL_DIR is not None): main_path = os.path.join(process.ORIGINAL_DIR, main_path) - d['main_path'] = os.path.normpath(main_path) + d['init_main_from_path'] = os.path.normpath(main_path) return d @@ -206,55 +212,68 @@ def prepare(data): if 'start_method' in data: set_start_method(data['start_method']) - if 'main_path' in data: - import_main_path(data['main_path']) + if 'init_main_from_name' in data: + _fixup_main_from_name(data['init_main_from_name']) + elif 'init_main_from_path' in data: + _fixup_main_from_path(data['init_main_from_path']) + +# Multiprocessing module helpers to fix up the main module in +# spawned subprocesses +def _fixup_main_from_name(mod_name): + # __main__.py files for packages, directories, zip archives, etc, run + # their "main only" code unconditionally, so we don't even try to + # populate anything in __main__, nor do we make any changes to + # __main__ attributes + current_main = sys.modules['__main__'] + if mod_name == "__main__" or mod_name.endswith(".__main__"): + return + + # If this process was forked, __main__ may already be populated + if getattr(current_main.__spec__, "name", None) == mod_name: + return + + # Otherwise, __main__ may contain some non-main code where we need to + # support unpickling it properly. We rerun it as __mp_main__ and make + # the normal __main__ an alias to that + old_main_modules.append(current_main) + main_module = types.ModuleType("__mp_main__") + main_content = runpy.run_module(mod_name, + run_name="__mp_main__", + alter_sys=True) + main_module.__dict__.update(main_content) + sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module + + +def _fixup_main_from_path(main_path): + # If this process was forked, __main__ may already be populated + current_main = sys.modules['__main__'] + + # Unfortunately, the main ipython launch script historically had no + # "if __name__ == '__main__'" guard, so we work around that + # by treating it like a __main__.py file + # See https://github.com/ipython/ipython/issues/4698 + main_name = os.path.splitext(os.path.basename(main_path))[0] + if main_name == 'ipython': + return + + # Otherwise, if __file__ already has the setting we expect, + # there's nothing more to do + if getattr(current_main, '__file__', None) == main_path: + return + + # If the parent process has sent a path through rather than a module + # name we assume it is an executable script that may contain + # non-main code that needs to be executed + old_main_modules.append(current_main) + main_module = types.ModuleType("__mp_main__") + main_content = runpy.run_path(main_path, + run_name="__mp_main__") + main_module.__dict__.update(main_content) + sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module def import_main_path(main_path): ''' Set sys.modules['__main__'] to module at main_path ''' - # XXX (ncoghlan): The following code makes several bogus - # assumptions regarding the relationship between __file__ - # and a module's real name. See PEP 302 and issue #10845 - if getattr(sys.modules['__main__'], '__file__', None) == main_path: - return - - main_name = os.path.splitext(os.path.basename(main_path))[0] - if main_name == '__init__': - main_name = os.path.basename(os.path.dirname(main_path)) - - if main_name == '__main__': - main_module = sys.modules['__main__'] - main_module.__file__ = main_path - elif main_name != 'ipython': - # Main modules not actually called __main__.py may - # contain additional code that should still be executed - import importlib - import types - - if main_path is None: - dirs = None - elif os.path.basename(main_path).startswith('__init__.py'): - dirs = [os.path.dirname(os.path.dirname(main_path))] - else: - dirs = [os.path.dirname(main_path)] - - assert main_name not in sys.modules, main_name - sys.modules.pop('__mp_main__', None) - # We should not try to load __main__ - # since that would execute 'if __name__ == "__main__"' - # clauses, potentially causing a psuedo fork bomb. - main_module = types.ModuleType(main_name) - # XXX Use a target of main_module? - spec = importlib.find_spec(main_name, path=dirs) - if spec is None: - raise ImportError(name=main_name) - methods = importlib._bootstrap._SpecMethods(spec) - methods.init_module_attrs(main_module) - main_module.__name__ = '__mp_main__' - code = spec.loader.get_code(main_name) - exec(code, main_module.__dict__) - - old_main_modules.append(sys.modules['__main__']) - sys.modules['__main__'] = sys.modules['__mp_main__'] = main_module + _fixup_main_from_path(main_path) diff --git a/Lib/test/test_multiprocessing_main_handling.py b/Lib/test/test_multiprocessing_main_handling.py new file mode 100644 index 0000000000..007fc5479e --- /dev/null +++ b/Lib/test/test_multiprocessing_main_handling.py @@ -0,0 +1,287 @@ +# tests __main__ module handling in multiprocessing + +import importlib +import importlib.machinery +import zipimport +import unittest +import sys +import os +import os.path +import py_compile + +from test import support +from test.script_helper import ( + make_pkg, make_script, make_zip_pkg, make_zip_script, + assert_python_ok, assert_python_failure, temp_dir, + spawn_python, kill_python) + +# We look inside the context module to find out which +# start methods we can check +from multiprocessing.context import _concrete_contexts + +verbose = support.verbose + +test_source = """\ +# multiprocessing includes all sorts of shenanigans to make __main__ +# attributes accessible in the subprocess in a pickle compatible way. + +# We run the "doesn't work in the interactive interpreter" example from +# the docs to make sure it *does* work from an executed __main__, +# regardless of the invocation mechanism + +import sys +import time +from multiprocessing import Pool, set_start_method + +# We use this __main__ defined function in the map call below in order to +# check that multiprocessing in correctly running the unguarded +# code in child processes and then making it available as __main__ +def f(x): + return x*x + +# Check explicit relative imports +if "check_sibling" in __file__: + # We're inside a package and not in a __main__.py file + # so make sure explicit relative imports work correctly + from . import sibling + +if __name__ == '__main__': + start_method = sys.argv[1] + set_start_method(start_method) + p = Pool(5) + results = [] + p.map_async(f, [1, 2, 3], callback=results.extend) + deadline = time.time() + 2 # up to 2 s to report the results + while not results: + time.sleep(0.05) + if time.time() > deadline: + raise RuntimeError("Timed out waiting for results") + results.sort() + print(start_method, "->", results) +""" + +test_source_main_skipped_in_children = """\ +# __main__.py files have an implied "if __name__ == '__main__'" so +# multiprocessing should always skip running them in child processes + +# This means we can't use __main__ defined functions in child processes, +# so we just use "int" as a passthrough operation below + +if __name__ != "__main__": + raise RuntimeError("Should only be called as __main__!") + +import sys +import time +from multiprocessing import Pool, set_start_method + +start_method = sys.argv[1] +set_start_method(start_method) +p = Pool(5) +results = [] +p.map_async(int, [1, 4, 9], callback=results.extend) +deadline = time.time() + 2 # up to 2 s to report the results +while not results: + time.sleep(0.05) + if time.time() > deadline: + raise RuntimeError("Timed out waiting for results") +results.sort() +print(start_method, "->", results) +""" + +# These helpers were copied from test_cmd_line_script & tweaked a bit... + +def _make_test_script(script_dir, script_basename, + source=test_source, omit_suffix=False): + to_return = make_script(script_dir, script_basename, + source, omit_suffix) + # Hack to check explicit relative imports + if script_basename == "check_sibling": + make_script(script_dir, "sibling", "") + importlib.invalidate_caches() + return to_return + +def _make_test_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename, + source=test_source, depth=1): + to_return = make_zip_pkg(zip_dir, zip_basename, pkg_name, script_basename, + source, depth) + importlib.invalidate_caches() + return to_return + +# There's no easy way to pass the script directory in to get +# -m to work (avoiding that is the whole point of making +# directories and zipfiles executable!) +# So we fake it for testing purposes with a custom launch script +launch_source = """\ +import sys, os.path, runpy +sys.path.insert(0, %s) +runpy._run_module_as_main(%r) +""" + +def _make_launch_script(script_dir, script_basename, module_name, path=None): + if path is None: + path = "os.path.dirname(__file__)" + else: + path = repr(path) + source = launch_source % (path, module_name) + to_return = make_script(script_dir, script_basename, source) + importlib.invalidate_caches() + return to_return + +class MultiProcessingCmdLineMixin(): + maxDiff = None # Show full tracebacks on subprocess failure + + def setupClass(cls): + if cls.start_method not in _concrete_contexts: + raise unittest.SkipTest("%r start method not available" % + cls.start_method) + + def _check_output(self, script_name, exit_code, out, err): + if verbose > 1: + print("Output from test script %r:" % script_name) + print(out) + self.assertEqual(exit_code, 0) + self.assertEqual(err.decode('utf-8'), '') + expected_results = "%s -> [1, 4, 9]" % self.start_method + self.assertEqual(out.decode('utf-8').strip(), expected_results) + + def _check_script(self, script_name, *cmd_line_switches): + if not __debug__: + cmd_line_switches += ('-' + 'O' * sys.flags.optimize,) + run_args = cmd_line_switches + (script_name, self.start_method) + rc, out, err = assert_python_ok(*run_args, __isolated=False) + self._check_output(script_name, rc, out, err) + + def test_basic_script(self): + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, 'script') + self._check_script(script_name) + + def test_basic_script_no_suffix(self): + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, 'script', + omit_suffix=True) + self._check_script(script_name) + + def test_ipython_workaround(self): + # Some versions of the IPython launch script are missing the + # __name__ = "__main__" guard, and multiprocessing has long had + # a workaround for that case + # See https://github.com/ipython/ipython/issues/4698 + source = test_source_main_skipped_in_children + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, 'ipython', + source=source) + self._check_script(script_name) + script_no_suffix = _make_test_script(script_dir, 'ipython', + source=source, + omit_suffix=True) + self._check_script(script_no_suffix) + + def test_script_compiled(self): + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, 'script') + py_compile.compile(script_name, doraise=True) + os.remove(script_name) + pyc_file = support.make_legacy_pyc(script_name) + self._check_script(pyc_file) + + def test_directory(self): + source = self.main_in_children_source + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, '__main__', + source=source) + self._check_script(script_dir) + + def test_directory_compiled(self): + source = self.main_in_children_source + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, '__main__', + source=source) + py_compile.compile(script_name, doraise=True) + os.remove(script_name) + pyc_file = support.make_legacy_pyc(script_name) + self._check_script(script_dir) + + def test_zipfile(self): + source = self.main_in_children_source + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, '__main__', + source=source) + zip_name, run_name = make_zip_script(script_dir, 'test_zip', script_name) + self._check_script(zip_name) + + def test_zipfile_compiled(self): + source = self.main_in_children_source + with temp_dir() as script_dir: + script_name = _make_test_script(script_dir, '__main__', + source=source) + compiled_name = py_compile.compile(script_name, doraise=True) + zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name) + self._check_script(zip_name) + + def test_module_in_package(self): + with temp_dir() as script_dir: + pkg_dir = os.path.join(script_dir, 'test_pkg') + make_pkg(pkg_dir) + script_name = _make_test_script(pkg_dir, 'check_sibling') + launch_name = _make_launch_script(script_dir, 'launch', + 'test_pkg.check_sibling') + self._check_script(launch_name) + + def test_module_in_package_in_zipfile(self): + with temp_dir() as script_dir: + zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script') + launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.script', zip_name) + self._check_script(launch_name) + + def test_module_in_subpackage_in_zipfile(self): + with temp_dir() as script_dir: + zip_name, run_name = _make_test_zip_pkg(script_dir, 'test_zip', 'test_pkg', 'script', depth=2) + launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg.test_pkg.script', zip_name) + self._check_script(launch_name) + + def test_package(self): + source = self.main_in_children_source + with temp_dir() as script_dir: + pkg_dir = os.path.join(script_dir, 'test_pkg') + make_pkg(pkg_dir) + script_name = _make_test_script(pkg_dir, '__main__', + source=source) + launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg') + self._check_script(launch_name) + + def test_package_compiled(self): + source = self.main_in_children_source + with temp_dir() as script_dir: + pkg_dir = os.path.join(script_dir, 'test_pkg') + make_pkg(pkg_dir) + script_name = _make_test_script(pkg_dir, '__main__', + source=source) + compiled_name = py_compile.compile(script_name, doraise=True) + os.remove(script_name) + pyc_file = support.make_legacy_pyc(script_name) + launch_name = _make_launch_script(script_dir, 'launch', 'test_pkg') + self._check_script(launch_name) + +# Test all supported start methods (setupClass skips as appropriate) + +class SpawnCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase): + start_method = 'spawn' + main_in_children_source = test_source_main_skipped_in_children + +class ForkCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase): + start_method = 'fork' + main_in_children_source = test_source + +class ForkServerCmdLineTest(MultiProcessingCmdLineMixin, unittest.TestCase): + start_method = 'forkserver' + main_in_children_source = test_source_main_skipped_in_children + +def test_main(): + support.run_unittest(SpawnCmdLineTest, + ForkCmdLineTest, + ForkServerCmdLineTest) + support.reap_children() + +if __name__ == '__main__': + test_main() diff --git a/Misc/NEWS b/Misc/NEWS index ee3c79302b..e8ab0e7c35 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -44,6 +44,12 @@ Core and Builtins Library ------- +- Issue #19946: multiprocessing now uses runpy to initialize __main__ in + child processes when necessary, allowing it to correctly handle scripts + without suffixes and submodules that use explicit relative imports or + otherwise rely on parent modules being correctly imported prior to + execution. + - Issue #19921: When Path.mkdir() is called with parents=True, any missing parent is created with the default permissions, ignoring the mode argument (mimicking the POSIX "mkdir -p" command). -- 2.40.0