]> granicus.if.org Git - python/commitdiff
bpo-9949: Enable symlink traversal for ntpath.realpath (GH-15287)
authorSteve Dower <steve.dower@python.org>
Wed, 21 Aug 2019 20:43:06 +0000 (13:43 -0700)
committerGitHub <noreply@github.com>
Wed, 21 Aug 2019 20:43:06 +0000 (13:43 -0700)
Doc/library/os.path.rst
Doc/whatsnew/3.8.rst
Lib/ntpath.py
Lib/test/test_ntpath.py
Lib/test/test_os.py
Lib/test/test_shutil.py
Lib/unittest/test/test_discovery.py
Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst [new file with mode: 0644]

index a673b81278ea7442fcd1bf91d182bbae6b9412de..640ceecbc78f16e75802aa837a85b53fcc9c0cc9 100644 (file)
@@ -350,11 +350,19 @@ the :mod:`glob` module.)
 .. function:: realpath(path)
 
    Return the canonical path of the specified filename, eliminating any symbolic
-   links encountered in the path (if they are supported by the operating system).
+   links encountered in the path (if they are supported by the operating
+   system).
+
+   .. note::
+      When symbolic link cycles occur, the returned path will be one member of
+      the cycle, but no guarantee is made about which member that will be.
 
    .. versionchanged:: 3.6
       Accepts a :term:`path-like object`.
 
+   .. versionchanged:: 3.8
+      Symbolic links and junctions are now resolved on Windows.
+
 
 .. function:: relpath(path, start=os.curdir)
 
index e8238251d6ea256deb41167c651f83c67224439c..93758e96b5736425c6f8002633e3086495064002 100644 (file)
@@ -824,6 +824,9 @@ characters or bytes unrepresentable at the OS level.
 environment variable and does not use :envvar:`HOME`, which is not normally set
 for regular user accounts.
 
+:func:`~os.path.realpath` on Windows now resolves reparse points, including
+symlinks and directory junctions.
+
 
 ncurses
 -------
index f3cfabf0238ef574fc0c14bf36ecf28a4ca0c447..ef4999e1473acb00b80e9c8acc61948232ea2189 100644 (file)
@@ -519,8 +519,94 @@ else:  # use native Windows method on Windows
         except (OSError, ValueError):
             return _abspath_fallback(path)
 
-# realpath is a no-op on systems without islink support
-realpath = abspath
+try:
+    from nt import _getfinalpathname, readlink as _nt_readlink
+except ImportError:
+    # realpath is a no-op on systems without _getfinalpathname support.
+    realpath = abspath
+else:
+    def _readlink_deep(path, seen=None):
+        if seen is None:
+            seen = set()
+
+        while normcase(path) not in seen:
+            seen.add(normcase(path))
+            try:
+                path = _nt_readlink(path)
+            except OSError as ex:
+                # Stop on file (2) or directory (3) not found, or
+                # paths that are not reparse points (4390)
+                if ex.winerror in (2, 3, 4390):
+                    break
+                raise
+            except ValueError:
+                # Stop on reparse points that are not symlinks
+                break
+        return path
+
+    def _getfinalpathname_nonstrict(path):
+        # Fast path to get the final path name. If this succeeds, there
+        # is no need to go any further.
+        try:
+            return _getfinalpathname(path)
+        except OSError:
+            pass
+
+        # Allow file (2) or directory (3) not found, invalid syntax (123),
+        # and symlinks that cannot be followed (1921)
+        allowed_winerror = 2, 3, 123, 1921
+
+        # Non-strict algorithm is to find as much of the target directory
+        # as we can and join the rest.
+        tail = ''
+        seen = set()
+        while path:
+            try:
+                path = _readlink_deep(path, seen)
+                path = _getfinalpathname(path)
+                return join(path, tail) if tail else path
+            except OSError as ex:
+                if ex.winerror not in allowed_winerror:
+                    raise
+                path, name = split(path)
+                if path and not name:
+                    return abspath(path + tail)
+                tail = join(name, tail) if tail else name
+        return abspath(tail)
+
+    def realpath(path):
+        path = os.fspath(path)
+        if isinstance(path, bytes):
+            prefix = b'\\\\?\\'
+            unc_prefix = b'\\\\?\\UNC\\'
+            new_unc_prefix = b'\\\\'
+            cwd = os.getcwdb()
+        else:
+            prefix = '\\\\?\\'
+            unc_prefix = '\\\\?\\UNC\\'
+            new_unc_prefix = '\\\\'
+            cwd = os.getcwd()
+        had_prefix = path.startswith(prefix)
+        path = _getfinalpathname_nonstrict(path)
+        # The path returned by _getfinalpathname will always start with \\?\ -
+        # strip off that prefix unless it was already provided on the original
+        # path.
+        if not had_prefix and path.startswith(prefix):
+            # For UNC paths, the prefix will actually be \\?\UNC\
+            # Handle that case as well.
+            if path.startswith(unc_prefix):
+                spath = new_unc_prefix + path[len(unc_prefix):]
+            else:
+                spath = path[len(prefix):]
+            # Ensure that the non-prefixed path resolves to the same path
+            try:
+                if _getfinalpathname(spath) == path:
+                    path = spath
+            except OSError as ex:
+                pass
+        return path
+
+
 # Win9x family and earlier have no Unicode filename support.
 supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and
                               sys.getwindowsversion()[3] >= 2)
@@ -633,23 +719,6 @@ def commonpath(paths):
         raise
 
 
-# determine if two files are in fact the same file
-try:
-    # GetFinalPathNameByHandle is available starting with Windows 6.0.
-    # Windows XP and non-Windows OS'es will mock _getfinalpathname.
-    if sys.getwindowsversion()[:2] >= (6, 0):
-        from nt import _getfinalpathname
-    else:
-        raise ImportError
-except (AttributeError, ImportError):
-    # On Windows XP and earlier, two files are the same if their absolute
-    # pathnames are the same.
-    # Non-Windows operating systems fake this method with an XP
-    # approximation.
-    def _getfinalpathname(f):
-        return normcase(abspath(f))
-
-
 try:
     # The genericpath.isdir implementation uses os.stat and checks the mode
     # attribute to tell whether or not the path is a directory.
index 92d85ecbc4fcfed4ec0bb0c3aff95f389ba99c07..74dc8c378e2746d657a3baf1e122493a84a7739e 100644 (file)
@@ -7,6 +7,7 @@ from test.support import TestFailed, FakePath
 from test import support, test_genericpath
 from tempfile import TemporaryFile
 
+
 try:
     import nt
 except ImportError:
@@ -14,6 +15,14 @@ except ImportError:
     # but for those that require it we import here.
     nt = None
 
+try:
+    ntpath._getfinalpathname
+except AttributeError:
+    HAVE_GETFINALPATHNAME = False
+else:
+    HAVE_GETFINALPATHNAME = True
+
+
 def tester(fn, wantResult):
     fn = fn.replace("\\", "\\\\")
     gotResult = eval(fn)
@@ -194,6 +203,189 @@ class TestNtpath(unittest.TestCase):
         tester("ntpath.normpath('\\\\.\\NUL')", r'\\.\NUL')
         tester("ntpath.normpath('\\\\?\\D:/XY\\Z')", r'\\?\D:/XY\Z')
 
+    def test_realpath_curdir(self):
+        expected = ntpath.normpath(os.getcwd())
+        tester("ntpath.realpath('.')", expected)
+        tester("ntpath.realpath('./.')", expected)
+        tester("ntpath.realpath('/'.join(['.'] * 100))", expected)
+        tester("ntpath.realpath('.\\.')", expected)
+        tester("ntpath.realpath('\\'.join(['.'] * 100))", expected)
+
+    def test_realpath_pardir(self):
+        expected = ntpath.normpath(os.getcwd())
+        tester("ntpath.realpath('..')", ntpath.dirname(expected))
+        tester("ntpath.realpath('../..')",
+               ntpath.dirname(ntpath.dirname(expected)))
+        tester("ntpath.realpath('/'.join(['..'] * 50))",
+               ntpath.splitdrive(expected)[0] + '\\')
+        tester("ntpath.realpath('..\\..')",
+               ntpath.dirname(ntpath.dirname(expected)))
+        tester("ntpath.realpath('\\'.join(['..'] * 50))",
+               ntpath.splitdrive(expected)[0] + '\\')
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_basic(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        open(ABSTFN, "wb").close()
+        self.addCleanup(support.unlink, ABSTFN)
+        self.addCleanup(support.unlink, ABSTFN + "1")
+
+        os.symlink(ABSTFN, ABSTFN + "1")
+        self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
+        self.assertEqual(ntpath.realpath(os.fsencode(ABSTFN + "1")),
+                         os.fsencode(ABSTFN))
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_relative(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        open(ABSTFN, "wb").close()
+        self.addCleanup(support.unlink, ABSTFN)
+        self.addCleanup(support.unlink, ABSTFN + "1")
+
+        os.symlink(ABSTFN, ntpath.relpath(ABSTFN + "1"))
+        self.assertEqual(ntpath.realpath(ABSTFN + "1"), ABSTFN)
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_broken_symlinks(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        os.mkdir(ABSTFN)
+        self.addCleanup(support.rmtree, ABSTFN)
+
+        with support.change_cwd(ABSTFN):
+            os.mkdir("subdir")
+            os.chdir("subdir")
+            os.symlink(".", "recursive")
+            os.symlink("..", "parent")
+            os.chdir("..")
+            os.symlink(".", "self")
+            os.symlink("missing", "broken")
+            os.symlink(r"broken\bar", "broken1")
+            os.symlink(r"self\self\broken", "broken2")
+            os.symlink(r"subdir\parent\subdir\parent\broken", "broken3")
+            os.symlink(ABSTFN + r"\broken", "broken4")
+            os.symlink(r"recursive\..\broken", "broken5")
+
+            self.assertEqual(ntpath.realpath("broken"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath(r"broken\foo"),
+                             ABSTFN + r"\missing\foo")
+            self.assertEqual(ntpath.realpath(r"broken1"),
+                             ABSTFN + r"\missing\bar")
+            self.assertEqual(ntpath.realpath(r"broken1\baz"),
+                             ABSTFN + r"\missing\bar\baz")
+            self.assertEqual(ntpath.realpath("broken2"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath("broken3"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath("broken4"),
+                             ABSTFN + r"\missing")
+            self.assertEqual(ntpath.realpath("broken5"),
+                             ABSTFN + r"\missing")
+
+            self.assertEqual(ntpath.realpath(b"broken"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(rb"broken\foo"),
+                             os.fsencode(ABSTFN + r"\missing\foo"))
+            self.assertEqual(ntpath.realpath(rb"broken1"),
+                             os.fsencode(ABSTFN + r"\missing\bar"))
+            self.assertEqual(ntpath.realpath(rb"broken1\baz"),
+                             os.fsencode(ABSTFN + r"\missing\bar\baz"))
+            self.assertEqual(ntpath.realpath(b"broken2"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(rb"broken3"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(b"broken4"),
+                             os.fsencode(ABSTFN + r"\missing"))
+            self.assertEqual(ntpath.realpath(b"broken5"),
+                             os.fsencode(ABSTFN + r"\missing"))
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_symlink_loops(self):
+        # Bug #930024, return the path unchanged if we get into an infinite
+        # symlink loop.
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        self.addCleanup(support.unlink, ABSTFN)
+        self.addCleanup(support.unlink, ABSTFN + "1")
+        self.addCleanup(support.unlink, ABSTFN + "2")
+        self.addCleanup(support.unlink, ABSTFN + "y")
+        self.addCleanup(support.unlink, ABSTFN + "c")
+        self.addCleanup(support.unlink, ABSTFN + "a")
+
+        P = "\\\\?\\"
+
+        os.symlink(ABSTFN, ABSTFN)
+        self.assertEqual(ntpath.realpath(ABSTFN), P + ABSTFN)
+
+        # cycles are non-deterministic as to which path is returned, but
+        # it will always be the fully resolved path of one member of the cycle
+        os.symlink(ABSTFN + "1", ABSTFN + "2")
+        os.symlink(ABSTFN + "2", ABSTFN + "1")
+        expected = (P + ABSTFN + "1", P + ABSTFN + "2")
+        self.assertIn(ntpath.realpath(ABSTFN + "1"), expected)
+        self.assertIn(ntpath.realpath(ABSTFN + "2"), expected)
+
+        self.assertIn(ntpath.realpath(ABSTFN + "1\\x"),
+                      (ntpath.join(r, "x") for r in expected))
+        self.assertEqual(ntpath.realpath(ABSTFN + "1\\.."),
+                         ntpath.dirname(ABSTFN))
+        self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\x"),
+                         ntpath.dirname(P + ABSTFN) + "\\x")
+        os.symlink(ABSTFN + "x", ABSTFN + "y")
+        self.assertEqual(ntpath.realpath(ABSTFN + "1\\..\\"
+                                         + ntpath.basename(ABSTFN) + "y"),
+                         P + ABSTFN + "x")
+        self.assertIn(ntpath.realpath(ABSTFN + "1\\..\\"
+                                      + ntpath.basename(ABSTFN) + "1"),
+                      expected)
+
+        os.symlink(ntpath.basename(ABSTFN) + "a\\b", ABSTFN + "a")
+        self.assertEqual(ntpath.realpath(ABSTFN + "a"), P + ABSTFN + "a")
+
+        os.symlink("..\\" + ntpath.basename(ntpath.dirname(ABSTFN))
+                   + "\\" + ntpath.basename(ABSTFN) + "c", ABSTFN + "c")
+        self.assertEqual(ntpath.realpath(ABSTFN + "c"), P + ABSTFN + "c")
+
+        # Test using relative path as well.
+        self.assertEqual(ntpath.realpath(ntpath.basename(ABSTFN)), P + ABSTFN)
+
+    @support.skip_unless_symlink
+    @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname')
+    def test_realpath_symlink_prefix(self):
+        ABSTFN = ntpath.abspath(support.TESTFN)
+        self.addCleanup(support.unlink, ABSTFN + "3")
+        self.addCleanup(support.unlink, "\\\\?\\" + ABSTFN + "3.")
+        self.addCleanup(support.unlink, ABSTFN + "3link")
+        self.addCleanup(support.unlink, ABSTFN + "3.link")
+
+        with open(ABSTFN + "3", "wb") as f:
+            f.write(b'0')
+        os.symlink(ABSTFN + "3", ABSTFN + "3link")
+
+        with open("\\\\?\\" + ABSTFN + "3.", "wb") as f:
+            f.write(b'1')
+        os.symlink("\\\\?\\" + ABSTFN + "3.", ABSTFN + "3.link")
+
+        self.assertEqual(ntpath.realpath(ABSTFN + "3link"),
+                         ABSTFN + "3")
+        self.assertEqual(ntpath.realpath(ABSTFN + "3.link"),
+                         "\\\\?\\" + ABSTFN + "3.")
+
+        # Resolved paths should be usable to open target files
+        with open(ntpath.realpath(ABSTFN + "3link"), "rb") as f:
+            self.assertEqual(f.read(), b'0')
+        with open(ntpath.realpath(ABSTFN + "3.link"), "rb") as f:
+            self.assertEqual(f.read(), b'1')
+
+        # When the prefix is included, it is not stripped
+        self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3link"),
+                         "\\\\?\\" + ABSTFN + "3")
+        self.assertEqual(ntpath.realpath("\\\\?\\" + ABSTFN + "3.link"),
+                         "\\\\?\\" + ABSTFN + "3.")
+
     def test_expandvars(self):
         with support.EnvironmentVarGuard() as env:
             env.clear()
@@ -288,11 +480,11 @@ class TestNtpath(unittest.TestCase):
 
     def test_relpath(self):
         tester('ntpath.relpath("a")', 'a')
-        tester('ntpath.relpath(os.path.abspath("a"))', 'a')
+        tester('ntpath.relpath(ntpath.abspath("a"))', 'a')
         tester('ntpath.relpath("a/b")', 'a\\b')
         tester('ntpath.relpath("../a/b")', '..\\a\\b')
         with support.temp_cwd(support.TESTFN) as cwd_dir:
-            currentdir = os.path.basename(cwd_dir)
+            currentdir = ntpath.basename(cwd_dir)
             tester('ntpath.relpath("a", "../b")', '..\\'+currentdir+'\\a')
             tester('ntpath.relpath("a/b", "../c")', '..\\'+currentdir+'\\a\\b')
         tester('ntpath.relpath("a", "b/c")', '..\\..\\a')
@@ -417,7 +609,7 @@ class TestNtpath(unittest.TestCase):
             # locations below cannot then refer to mount points
             #
             drive, path = ntpath.splitdrive(sys.executable)
-            with support.change_cwd(os.path.dirname(sys.executable)):
+            with support.change_cwd(ntpath.dirname(sys.executable)):
                 self.assertFalse(ntpath.ismount(drive.lower()))
                 self.assertFalse(ntpath.ismount(drive.upper()))
 
index b2cd4cca5f21fe351029a326daf2225ffa8bcedb..15fd65b55149e77b37433f5f6148481db1cc00d6 100644 (file)
@@ -3358,10 +3358,7 @@ class OSErrorTests(unittest.TestCase):
         if hasattr(os, "lchmod"):
             funcs.append((self.filenames, os.lchmod, 0o777))
         if hasattr(os, "readlink"):
-            if sys.platform == "win32":
-                funcs.append((self.unicode_filenames, os.readlink,))
-            else:
-                funcs.append((self.filenames, os.readlink,))
+            funcs.append((self.filenames, os.readlink,))
 
 
         for filenames, func, *func_args in funcs:
index e209607f22c1458fb67a644ff6574efa682fcb12..88dc4d9e195ebe26eb71261f981c75fc5c3fe0e2 100644 (file)
@@ -1871,11 +1871,7 @@ class TestMove(unittest.TestCase):
         dst_link = os.path.join(self.dst_dir, 'quux')
         shutil.move(dst, dst_link)
         self.assertTrue(os.path.islink(dst_link))
-        # On Windows, os.path.realpath does not follow symlinks (issue #9949)
-        if os.name == 'nt':
-            self.assertEqual(os.path.realpath(src), os.readlink(dst_link))
-        else:
-            self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link))
+        self.assertEqual(os.path.realpath(src), os.path.realpath(dst_link))
 
     @support.skip_unless_symlink
     @mock_rename
index 204043b493b5d2f661b5cec535d1b71c2f288a58..16e081e1fb76ec15627876cfecbc5ab3907200b9 100644 (file)
@@ -723,11 +723,13 @@ class TestDiscovery(unittest.TestCase):
         original_listdir = os.listdir
         original_isfile = os.path.isfile
         original_isdir = os.path.isdir
+        original_realpath = os.path.realpath
 
         def cleanup():
             os.listdir = original_listdir
             os.path.isfile = original_isfile
             os.path.isdir = original_isdir
+            os.path.realpath = original_realpath
             del sys.modules['foo']
             if full_path in sys.path:
                 sys.path.remove(full_path)
@@ -742,6 +744,10 @@ class TestDiscovery(unittest.TestCase):
         os.listdir = listdir
         os.path.isfile = isfile
         os.path.isdir = isdir
+        if os.name == 'nt':
+            # ntpath.realpath may inject path prefixes when failing to
+            # resolve real files, so we substitute abspath() here instead.
+            os.path.realpath = os.path.abspath
         return full_path
 
     def test_detect_module_clash(self):
diff --git a/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst b/Misc/NEWS.d/next/Windows/2019-08-14-13-40-15.bpo-9949.zW45Ks.rst
new file mode 100644 (file)
index 0000000..e42169a
--- /dev/null
@@ -0,0 +1 @@
+Enable support for following symlinks in :func:`os.realpath`.