From 75e064962ee0e31ec19a8081e9d9cc957baf6415 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Wed, 21 Aug 2019 13:43:06 -0700 Subject: [PATCH] bpo-9949: Enable symlink traversal for ntpath.realpath (GH-15287) --- Doc/library/os.path.rst | 10 +- Doc/whatsnew/3.8.rst | 3 + Lib/ntpath.py | 107 ++++++++-- Lib/test/test_ntpath.py | 198 +++++++++++++++++- Lib/test/test_os.py | 5 +- Lib/test/test_shutil.py | 6 +- Lib/unittest/test/test_discovery.py | 6 + .../2019-08-14-13-40-15.bpo-9949.zW45Ks.rst | 1 + 8 files changed, 304 insertions(+), 32 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index a673b81278..640ceecbc7 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -350,11 +350,19 @@ the :mod:`glob` module.) .. function:: realpath(path) Return the canonical path of the specified filename, eliminating any symbolic - links encountered in the path (if they are supported by the operating system). + links encountered in the path (if they are supported by the operating + system). + + .. note:: + When symbolic link cycles occur, the returned path will be one member of + the cycle, but no guarantee is made about which member that will be. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. + .. versionchanged:: 3.8 + Symbolic links and junctions are now resolved on Windows. + .. function:: relpath(path, start=os.curdir) diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index e8238251d6..93758e96b5 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -824,6 +824,9 @@ characters or bytes unrepresentable at the OS level. environment variable and does not use :envvar:`HOME`, which is not normally set for regular user accounts. +:func:`~os.path.realpath` on Windows now resolves reparse points, including +symlinks and directory junctions. + ncurses ------- diff --git a/Lib/ntpath.py b/Lib/ntpath.py index f3cfabf023..ef4999e147 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -519,8 +519,94 @@ else: # use native Windows method on Windows except (OSError, ValueError): return _abspath_fallback(path) -# realpath is a no-op on systems without islink support -realpath = abspath +try: + from nt import _getfinalpathname, readlink as _nt_readlink +except ImportError: + # realpath is a no-op on systems without _getfinalpathname support. + realpath = abspath +else: + def _readlink_deep(path, seen=None): + if seen is None: + seen = set() + + while normcase(path) not in seen: + seen.add(normcase(path)) + try: + path = _nt_readlink(path) + except OSError as ex: + # Stop on file (2) or directory (3) not found, or + # paths that are not reparse points (4390) + if ex.winerror in (2, 3, 4390): + break + raise + except ValueError: + # Stop on reparse points that are not symlinks + break + return path + + def _getfinalpathname_nonstrict(path): + # Fast path to get the final path name. If this succeeds, there + # is no need to go any further. + try: + return _getfinalpathname(path) + except OSError: + pass + + # Allow file (2) or directory (3) not found, invalid syntax (123), + # and symlinks that cannot be followed (1921) + allowed_winerror = 2, 3, 123, 1921 + + # Non-strict algorithm is to find as much of the target directory + # as we can and join the rest. + tail = '' + seen = set() + while path: + try: + path = _readlink_deep(path, seen) + path = _getfinalpathname(path) + return join(path, tail) if tail else path + except OSError as ex: + if ex.winerror not in allowed_winerror: + raise + path, name = split(path) + if path and not name: + return abspath(path + tail) + tail = join(name, tail) if tail else name + return abspath(tail) + + def realpath(path): + path = os.fspath(path) + if isinstance(path, bytes): + prefix = b'\\\\?\\' + unc_prefix = b'\\\\?\\UNC\\' + new_unc_prefix = b'\\\\' + cwd = os.getcwdb() + else: + prefix = '\\\\?\\' + unc_prefix = '\\\\?\\UNC\\' + new_unc_prefix = '\\\\' + cwd = os.getcwd() + had_prefix = path.startswith(prefix) + path = _getfinalpathname_nonstrict(path) + # The path returned by _getfinalpathname will always start with \\?\ - + # strip off that prefix unless it was already provided on the original + # path. + if not had_prefix and path.startswith(prefix): + # For UNC paths, the prefix will actually be \\?\UNC\ + # Handle that case as well. + if path.startswith(unc_prefix): + spath = new_unc_prefix + path[len(unc_prefix):] + else: + spath = path[len(prefix):] + # Ensure that the non-prefixed path resolves to the same path + try: + if _getfinalpathname(spath) == path: + path = spath + except OSError as ex: + pass + return path + + # Win9x family and earlier have no Unicode filename support. supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and sys.getwindowsversion()[3] >= 2) @@ -633,23 +719,6 @@ def commonpath(paths): raise -# determine if two files are in fact the same file -try: - # GetFinalPathNameByHandle is available starting with Windows 6.0. - # Windows XP and non-Windows OS'es will mock _getfinalpathname. - if sys.getwindowsversion()[:2] >= (6, 0): - from nt import _getfinalpathname - else: - raise ImportError -except (AttributeError, ImportError): - # On Windows XP and earlier, two files are the same if their absolute - # pathnames are the same. - # Non-Windows operating systems fake this method with an XP - # approximation. - def _getfinalpathname(f): - return normcase(abspath(f)) - - try: # The genericpath.isdir implementation uses os.stat and checks the mode # attribute to tell whether or not the path is a directory. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 92d85ecbc4..74dc8c378e 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -7,6 +7,7 @@ from test.support import TestFailed, FakePath from test import support, test_genericpath from tempfile import TemporaryFile + try: import nt except ImportError: @@ -14,6 +15,14 @@ except ImportError: # but for those that require it we import here. nt = None +try: + ntpath._getfinalpathname +except AttributeError: + HAVE_GETFINALPATHNAME = False +else: + HAVE_GETFINALPATHNAME = True + + def tester(fn, wantResult): fn = fn.replace("\\", "\\\\") gotResult = eval(fn) @@ -194,6 +203,189 @@ class TestNtpath(unittest.TestCase): tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL') tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z') + def test_realpath_curdir(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('.')", expected) + tester("ntpath.realpath('./.')", expected) + tester("ntpath.realpath('/'.join(['.'] * 100))", expected) + tester("ntpath.realpath('.\\.')", expected) + tester("ntpath.realpath('\\'.join(['.'] * 100))", expected) + + def test_realpath_pardir(self): + expected = ntpath.normpath(os.getcwd()) + tester("ntpath.realpath('..')", ntpath.dirname(expected)) + tester("ntpath.realpath('../..')", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('/'.join(['..'] * 50))", + ntpath.splitdrive(expected)[0] + '\\') + tester("ntpath.realpath('..\\..')", + ntpath.dirname(ntpath.dirname(expected))) + tester("ntpath.realpath('\\'.join(['..'] * 50))", + ntpath.splitdrive(expected)[0] + '\\') + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_basic(self): + ABSTFN = ntpath.abspath(support.TESTFN) + open(ABSTFN, "wb").close() + self.addCleanup(support.unlink, ABSTFN) + self.addCleanup(support.unlink, ABSTFN + "1") + + os.symlink(ABSTFN, ABSTFN + "1") + self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) + self.assertEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")), + os.fsencode(ABSTFN)) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_relative(self): + ABSTFN = ntpath.abspath(support.TESTFN) + open(ABSTFN, "wb").close() + self.addCleanup(support.unlink, ABSTFN) + self.addCleanup(support.unlink, ABSTFN + "1") + + os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1")) + self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_broken_symlinks(self): + ABSTFN = ntpath.abspath(support.TESTFN) + os.mkdir(ABSTFN) + self.addCleanup(support.rmtree, ABSTFN) + + with support.change_cwd(ABSTFN): + os.mkdir("subdir") + os.chdir("subdir") + os.symlink(".", "recursive") + os.symlink("..", "parent") + os.chdir("..") + os.symlink(".", "self") + os.symlink("missing", "broken") + os.symlink(r"broken\bar", "broken1") + os.symlink(r"self\self\broken", "broken2") + os.symlink(r"subdir\parent\subdir\parent\broken", "broken3") + os.symlink(ABSTFN + r"\broken", "broken4") + os.symlink(r"recursive\..\broken", "broken5") + + self.assertEqual(ntpath.realpath("broken"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath(r"broken\foo"), + ABSTFN + r"\missing\foo") + self.assertEqual(ntpath.realpath(r"broken1"), + ABSTFN + r"\missing\bar") + self.assertEqual(ntpath.realpath(r"broken1\baz"), + ABSTFN + r"\missing\bar\baz") + self.assertEqual(ntpath.realpath("broken2"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken3"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken4"), + ABSTFN + r"\missing") + self.assertEqual(ntpath.realpath("broken5"), + ABSTFN + r"\missing") + + self.assertEqual(ntpath.realpath(b"broken"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(rb"broken\foo"), + os.fsencode(ABSTFN + r"\missing\foo")) + self.assertEqual(ntpath.realpath(rb"broken1"), + os.fsencode(ABSTFN + r"\missing\bar")) + self.assertEqual(ntpath.realpath(rb"broken1\baz"), + os.fsencode(ABSTFN + r"\missing\bar\baz")) + self.assertEqual(ntpath.realpath(b"broken2"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(rb"broken3"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(b"broken4"), + os.fsencode(ABSTFN + r"\missing")) + self.assertEqual(ntpath.realpath(b"broken5"), + os.fsencode(ABSTFN + r"\missing")) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_symlink_loops(self): + # Bug #930024, return the path unchanged if we get into an infinite + # symlink loop. + ABSTFN = ntpath.abspath(support.TESTFN) + self.addCleanup(support.unlink, ABSTFN) + self.addCleanup(support.unlink, ABSTFN + "1") + self.addCleanup(support.unlink, ABSTFN + "2") + self.addCleanup(support.unlink, ABSTFN + "y") + self.addCleanup(support.unlink, ABSTFN + "c") + self.addCleanup(support.unlink, ABSTFN + "a") + + P = "\\\\?\\" + + os.symlink(ABSTFN, ABSTFN) + self.assertEqual(ntpath.realpath(ABSTFN), P + ABSTFN) + + # cycles are non-deterministic as to which path is returned, but + # it will always be the fully resolved path of one member of the cycle + os.symlink(ABSTFN + "1", ABSTFN + "2") + os.symlink(ABSTFN + "2", ABSTFN + "1") + expected = (P + ABSTFN + "1", P + ABSTFN + "2") + self.assertIn(ntpath.realpath(ABSTFN + "1"), expected) + self.assertIn(ntpath.realpath(ABSTFN + "2"), expected) + + self.assertIn(ntpath.realpath(ABSTFN + "1\\x"), + (ntpath.join(r, "x") for r in expected)) + self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."), + ntpath.dirname(ABSTFN)) + self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"), + ntpath.dirname(P + ABSTFN) + "\\x") + os.symlink(ABSTFN + "x", ABSTFN + "y") + self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\" + + ntpath.basename(ABSTFN) + "y"), + P + ABSTFN + "x") + self.assertIn(ntpath.realpath(ABSTFN + "1\\..\\" + + ntpath.basename(ABSTFN) + "1"), + expected) + + os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a") + self.assertEqual(ntpath.realpath(ABSTFN + "a"), P + ABSTFN + "a") + + os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN)) + + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c") + self.assertEqual(ntpath.realpath(ABSTFN + "c"), P + ABSTFN + "c") + + # Test using relative path as well. + self.assertEqual(ntpath.realpath(ntpath.basename(ABSTFN)), P + ABSTFN) + + @support.skip_unless_symlink + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_symlink_prefix(self): + ABSTFN = ntpath.abspath(support.TESTFN) + self.addCleanup(support.unlink, ABSTFN + "3") + self.addCleanup(support.unlink, "\\\\?\\" + ABSTFN + "3.") + self.addCleanup(support.unlink, ABSTFN + "3link") + self.addCleanup(support.unlink, ABSTFN + "3.link") + + with open(ABSTFN + "3", "wb") as f: + f.write(b'0') + os.symlink(ABSTFN + "3", ABSTFN + "3link") + + with open("\\\\?\\" + ABSTFN + "3.", "wb") as f: + f.write(b'1') + os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link") + + self.assertEqual(ntpath.realpath(ABSTFN + "3link"), + ABSTFN + "3") + self.assertEqual(ntpath.realpath(ABSTFN + "3.link"), + "\\\\?\\" + ABSTFN + "3.") + + # Resolved paths should be usable to open target files + with open(ntpath.realpath(ABSTFN + "3link"), "rb") as f: + self.assertEqual(f.read(), b'0') + with open(ntpath.realpath(ABSTFN + "3.link"), "rb") as f: + self.assertEqual(f.read(), b'1') + + # When the prefix is included, it is not stripped + self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"), + "\\\\?\\" + ABSTFN + "3") + self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"), + "\\\\?\\" + ABSTFN + "3.") + def test_expandvars(self): with support.EnvironmentVarGuard() as env: env.clear() @@ -288,11 +480,11 @@ class TestNtpath(unittest.TestCase): def test_relpath(self): tester('ntpath.relpath("a")', 'a') - tester('ntpath.relpath(os.path.abspath("a"))', 'a') + tester('ntpath.relpath(ntpath.abspath("a"))', 'a') tester('ntpath.relpath("a/b")', 'a\\b') tester('ntpath.relpath("../a/b")', '..\\a\\b') with support.temp_cwd(support.TESTFN) as cwd_dir: - currentdir = os.path.basename(cwd_dir) + currentdir = ntpath.basename(cwd_dir) tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a') tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b') tester('ntpath.relpath("a", "b/c")', '..\\..\\a') @@ -417,7 +609,7 @@ class TestNtpath(unittest.TestCase): # locations below cannot then refer to mount points # drive, path = ntpath.splitdrive(sys.executable) - with support.change_cwd(os.path.dirname(sys.executable)): + with support.change_cwd(ntpath.dirname(sys.executable)): self.assertFalse(ntpath.ismount(drive.lower())) self.assertFalse(ntpath.ismount(drive.upper())) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index b2cd4cca5f..15fd65b551 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -3358,10 +3358,7 @@ class OSErrorTests(unittest.TestCase): if hasattr(os, "lchmod"): funcs.append((self.filenames, os.lchmod, 0o777)) if hasattr(os, "readlink"): - if sys.platform == "win32": - funcs.append((self.unicode_filenames, os.readlink,)) - else: - funcs.append((self.filenames, os.readlink,)) + funcs.append((self.filenames, os.readlink,)) for filenames, func, *func_args in funcs: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index e209607f22..88dc4d9e19 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -1871,11 +1871,7 @@ class TestMove(unittest.TestCase): dst_link = os.path.join(self.dst_dir, 'quux') shutil.move(dst, dst_link) self.assertTrue(os.path.islink(dst_link)) - # On Windows, os.path.realpath does not follow symlinks (issue #9949) - if os.name == 'nt': - self.assertEqual(os.path.realpath(src), os.readlink(dst_link)) - else: - self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link)) + self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link)) @support.skip_unless_symlink @mock_rename diff --git a/Lib/unittest/test/test_discovery.py b/Lib/unittest/test/test_discovery.py index 204043b493..16e081e1fb 100644 --- a/Lib/unittest/test/test_discovery.py +++ b/Lib/unittest/test/test_discovery.py @@ -723,11 +723,13 @@ class TestDiscovery(unittest.TestCase): original_listdir = os.listdir original_isfile = os.path.isfile original_isdir = os.path.isdir + original_realpath = os.path.realpath def cleanup(): os.listdir = original_listdir os.path.isfile = original_isfile os.path.isdir = original_isdir + os.path.realpath = original_realpath del sys.modules['foo'] if full_path in sys.path: sys.path.remove(full_path) @@ -742,6 +744,10 @@ class TestDiscovery(unittest.TestCase): os.listdir = listdir os.path.isfile = isfile os.path.isdir = isdir + if os.name == 'nt': + # ntpath.realpath may inject path prefixes when failing to + # resolve real files, so we substitute abspath() here instead. + os.path.realpath = os.path.abspath return full_path def test_detect_module_clash(self): diff --git a/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst b/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst new file mode 100644 index 0000000000..e42169a927 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst @@ -0,0 +1 @@ +Enable support for following symlinks in :func:`os.realpath`. -- 2.40.0