]> granicus.if.org Git - python/commitdiff
Issue #3187: Better support for "undecodable" filenames. Code by Victor
authorGuido van Rossum <guido@python.org>
Thu, 2 Oct 2008 18:55:37 +0000 (18:55 +0000)
committerGuido van Rossum <guido@python.org>
Thu, 2 Oct 2008 18:55:37 +0000 (18:55 +0000)
Stinner, with small tweaks by GvR.

Lib/fnmatch.py
Lib/genericpath.py
Lib/glob.py
Lib/io.py
Lib/posixpath.py
Lib/test/test_fnmatch.py
Lib/test/test_posix.py
Lib/test/test_posixpath.py
Lib/test/test_unicode_file.py
Misc/NEWS
Modules/posixmodule.c

index 3bf2463913eb221cef1e4b8220b659b75ba5ffb4..dd012059a7f9f47ea14ced3c495f1f0b357df739 100644 (file)
@@ -37,15 +37,24 @@ def fnmatch(name, pat):
     pat = os.path.normcase(pat)
     return fnmatchcase(name, pat)
 
+def _compile_pattern(pat):
+    regex = _cache.get(pat)
+    if regex is None:
+        if isinstance(pat, bytes):
+            pat_str = str(pat, 'ISO-8859-1')
+            res_str = translate(pat_str)
+            res = bytes(res_str, 'ISO-8859-1')
+        else:
+            res = translate(pat)
+        _cache[pat] = regex = re.compile(res)
+    return regex.match
+
 def filter(names, pat):
     """Return the subset of the list NAMES that match PAT"""
     import os,posixpath
-    result=[]
-    pat=os.path.normcase(pat)
-    if not pat in _cache:
-        res = translate(pat)
-        _cache[pat] = re.compile(res)
-    match=_cache[pat].match
+    result = []
+    pat = os.path.normcase(pat)
+    match = _compile_pattern(pat)
     if os.path is posixpath:
         # normcase on posix is NOP. Optimize it away from the loop.
         for name in names:
@@ -64,10 +73,8 @@ def fnmatchcase(name, pat):
     its arguments.
     """
 
-    if not pat in _cache:
-        res = translate(pat)
-        _cache[pat] = re.compile(res)
-    return _cache[pat].match(name) is not None
+    match = _compile_pattern(pat)
+    return match(name) is not None
 
 def translate(pat):
     """Translate a shell PATTERN to a regular expression.
index 73d7b26b3e27a137a1ff7897bb3143d5b7fd75bd..41ad234829f909ae44e73263884be53de686b016 100644 (file)
@@ -87,6 +87,7 @@ def _splitext(p, sep, altsep, extsep):
 
     Extension is everything from the last dot to the end, ignoring
     leading dots.  Returns "(root, ext)"; ext may be empty."""
+    # NOTE: This code must work for text and bytes strings.
 
     sepIndex = p.rfind(sep)
     if altsep:
@@ -98,8 +99,8 @@ def _splitext(p, sep, altsep, extsep):
         # skip all leading dots
         filenameIndex = sepIndex + 1
         while filenameIndex < dotIndex:
-            if p[filenameIndex] != extsep:
+            if p[filenameIndex:filenameIndex+1] != extsep:
                 return p[:dotIndex], p[dotIndex:]
             filenameIndex += 1
 
-    return p, ''
+    return p, p[:0]
index cd6c3024ca4202fa2b085f94b9cae7683b3b2854..9529f7e2ee5fcd6baa1b7b5cc245492497c69e12 100644 (file)
@@ -27,7 +27,7 @@ def iglob(pathname):
         return
     dirname, basename = os.path.split(pathname)
     if not dirname:
-        for name in glob1(os.curdir, basename):
+        for name in glob1(None, basename):
             yield name
         return
     if has_magic(dirname):
@@ -48,10 +48,10 @@ def iglob(pathname):
 
 def glob1(dirname, pattern):
     if not dirname:
-        dirname = os.curdir
-    if isinstance(pattern, str) and not isinstance(dirname, str):
-        dirname = str(dirname, sys.getfilesystemencoding() or
-                                   sys.getdefaultencoding())
+        if isinstance(pattern, bytes):
+            dirname = bytes(os.curdir, 'ASCII')
+        else:
+            dirname = os.curdir
     try:
         names = os.listdir(dirname)
     except os.error:
@@ -73,6 +73,11 @@ def glob0(dirname, basename):
 
 
 magic_check = re.compile('[*?[]')
+magic_check_bytes = re.compile(b'[*?[]')
 
 def has_magic(s):
-    return magic_check.search(s) is not None
+    if isinstance(s, bytes):
+        match = magic_check_bytes.search(s)
+    else:
+        match = magic_check.search(s)
+    return match is not None
index c1513f5acc155db6e107a412061a26d599a068da..8e65a10e3a4a4f8b3decf07f578e9595ff611f02 100644 (file)
--- a/Lib/io.py
+++ b/Lib/io.py
@@ -82,14 +82,13 @@ class BlockingIOError(IOError):
 def open(file, mode="r", buffering=None, encoding=None, errors=None,
          newline=None, closefd=True):
 
-    r"""Open file and return a stream. If the file cannot be opened, an IOError is
-    raised.
+    r"""Open file and return a stream.  Raise IOError upon failure.
 
-    file is either a string giving the name (and the path if the file
-    isn't in the current working directory) of the file to be opened or an
-    integer file descriptor of the file to be wrapped. (If a file
-    descriptor is given, it is closed when the returned I/O object is
-    closed, unless closefd is set to False.)
+    file is either a text or byte string giving the name (and the path
+    if the file isn't in the current working directory) of the file to
+    be opened or an integer file descriptor of the file to be
+    wrapped. (If a file descriptor is given, it is closed when the
+    returned I/O object is closed, unless closefd is set to False.)
 
     mode is an optional string that specifies the mode in which the file
     is opened. It defaults to 'r' which means open for reading in text
@@ -180,7 +179,7 @@ def open(file, mode="r", buffering=None, encoding=None, errors=None,
     opened in a text mode, and for bytes a BytesIO can be used like a file
     opened in a binary mode.
     """
-    if not isinstance(file, (str, int)):
+    if not isinstance(file, (str, bytes, int)):
         raise TypeError("invalid file: %r" % file)
     if not isinstance(mode, str):
         raise TypeError("invalid mode: %r" % mode)
index 575492f92b3031c837e4d34ed4f5aaa0e89c53aa..745c9204e11a07f70446985b1517541d2cdcba45 100644 (file)
@@ -11,6 +11,7 @@ for manipulation of the pathname component of URLs.
 """
 
 import os
+import sys
 import stat
 import genericpath
 from genericpath import *
@@ -23,7 +24,8 @@ __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
            "devnull","realpath","supports_unicode_filenames","relpath"]
 
-# strings representing various path-related bits and pieces
+# Strings representing various path-related bits and pieces.
+# These are primarily for export; internally, they are hardcoded.
 curdir = '.'
 pardir = '..'
 extsep = '.'
@@ -33,6 +35,12 @@ defpath = ':/bin:/usr/bin'
 altsep = None
 devnull = '/dev/null'
 
+def _get_sep(path):
+    if isinstance(path, bytes):
+        return b'/'
+    else:
+        return '/'
+
 # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
 # On MS-DOS this may also turn slashes into backslashes; however, other
 # normalizations (such as optimizing '../' away) are not allowed
@@ -40,6 +48,7 @@ devnull = '/dev/null'
 
 def normcase(s):
     """Normalize case of pathname.  Has no effect under Posix"""
+    # TODO: on Mac OS X, this should really return s.lower().
     return s
 
 
@@ -48,7 +57,8 @@ def normcase(s):
 
 def isabs(s):
     """Test whether a path is absolute"""
-    return s.startswith('/')
+    sep = _get_sep(s)
+    return s.startswith(sep)
 
 
 # Join pathnames.
@@ -59,14 +69,15 @@ def join(a, *p):
     """Join two or more pathname components, inserting '/' as needed.
     If any component is an absolute path, all previous path components
     will be discarded."""
+    sep = _get_sep(a)
     path = a
     for b in p:
-        if b.startswith('/'):
+        if b.startswith(sep):
             path = b
-        elif path == '' or path.endswith('/'):
+        elif not path or path.endswith(sep):
             path +=  b
         else:
-            path += '/' + b
+            path += sep + b
     return path
 
 
@@ -78,10 +89,11 @@ def join(a, *p):
 def split(p):
     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
     everything after the final slash.  Either part may be empty."""
-    i = p.rfind('/') + 1
+    sep = _get_sep(p)
+    i = p.rfind(sep) + 1
     head, tail = p[:i], p[i:]
-    if head and head != '/'*len(head):
-        head = head.rstrip('/')
+    if head and head != sep*len(head):
+        head = head.rstrip(sep)
     return head, tail
 
 
@@ -91,7 +103,13 @@ def split(p):
 # It is always true that root + ext == p.
 
 def splitext(p):
-    return genericpath._splitext(p, sep, altsep, extsep)
+    if isinstance(p, bytes):
+        sep = b'/'
+        extsep = b'.'
+    else:
+        sep = '/'
+        extsep = '.'
+    return genericpath._splitext(p, sep, None, extsep)
 splitext.__doc__ = genericpath._splitext.__doc__
 
 # Split a pathname into a drive specification and the rest of the
@@ -100,14 +118,15 @@ splitext.__doc__ = genericpath._splitext.__doc__
 def splitdrive(p):
     """Split a pathname into drive and path. On Posix, drive is always
     empty."""
-    return '', p
+    return p[:0], p
 
 
 # Return the tail (basename) part of a path, same as split(path)[1].
 
 def basename(p):
     """Returns the final component of a pathname"""
-    i = p.rfind('/') + 1
+    sep = _get_sep(p)
+    i = p.rfind(sep) + 1
     return p[i:]
 
 
@@ -115,10 +134,11 @@ def basename(p):
 
 def dirname(p):
     """Returns the directory component of a pathname"""
-    i = p.rfind('/') + 1
+    sep = _get_sep(p)
+    i = p.rfind(sep) + 1
     head = p[:i]
-    if head and head != '/'*len(head):
-        head = head.rstrip('/')
+    if head and head != sep*len(head):
+        head = head.rstrip(sep)
     return head
 
 
@@ -179,7 +199,11 @@ def ismount(path):
     """Test whether a path is a mount point"""
     try:
         s1 = os.lstat(path)
-        s2 = os.lstat(join(path, '..'))
+        if isinstance(path, bytes):
+            parent = join(path, b'..')
+        else:
+            parent = join(path, '..')
+        s2 = os.lstat(parent)
     except os.error:
         return False # It doesn't exist -- so not a mount point :-)
     dev1 = s1.st_dev
@@ -205,9 +229,14 @@ def ismount(path):
 def expanduser(path):
     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
     do nothing."""
-    if not path.startswith('~'):
+    if isinstance(path, bytes):
+        tilde = b'~'
+    else:
+        tilde = '~'
+    if not path.startswith(tilde):
         return path
-    i = path.find('/', 1)
+    sep = _get_sep(path)
+    i = path.find(sep, 1)
     if i < 0:
         i = len(path)
     if i == 1:
@@ -218,12 +247,17 @@ def expanduser(path):
             userhome = os.environ['HOME']
     else:
         import pwd
+        name = path[1:i]
+        if isinstance(name, bytes):
+            name = str(name, 'ASCII')
         try:
-            pwent = pwd.getpwnam(path[1:i])
+            pwent = pwd.getpwnam(name)
         except KeyError:
             return path
         userhome = pwent.pw_dir
-    userhome = userhome.rstrip('/')
+    if isinstance(path, bytes):
+        userhome = userhome.encode(sys.getfilesystemencoding())
+    userhome = userhome.rstrip(sep)
     return userhome + path[i:]
 
 
@@ -232,28 +266,47 @@ def expanduser(path):
 # Non-existent variables are left unchanged.
 
 _varprog = None
+_varprogb = None
 
 def expandvars(path):
     """Expand shell variables of form $var and ${var}.  Unknown variables
     are left unchanged."""
-    global _varprog
-    if '$' not in path:
-        return path
-    if not _varprog:
-        import re
-        _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
+    global _varprog, _varprogb
+    if isinstance(path, bytes):
+        if b'$' not in path:
+            return path
+        if not _varprogb:
+            import re
+            _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
+        search = _varprogb.search
+        start = b'{'
+        end = b'}'
+    else:
+        if '$' not in path:
+            return path
+        if not _varprog:
+            import re
+            _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
+        search = _varprog.search
+        start = '{'
+        end = '}'
     i = 0
     while True:
-        m = _varprog.search(path, i)
+        m = search(path, i)
         if not m:
             break
         i, j = m.span(0)
         name = m.group(1)
-        if name.startswith('{') and name.endswith('}'):
+        if name.startswith(start) and name.endswith(end):
             name = name[1:-1]
+        if isinstance(name, bytes):
+            name = str(name, 'ASCII')
         if name in os.environ:
             tail = path[j:]
-            path = path[:i] + os.environ[name]
+            value = os.environ[name]
+            if isinstance(path, bytes):
+                value = value.encode('ASCII')
+            path = path[:i] + value
             i = len(path)
             path += tail
         else:
@@ -267,35 +320,49 @@ def expandvars(path):
 
 def normpath(path):
     """Normalize path, eliminating double slashes, etc."""
-    if path == '':
-        return '.'
-    initial_slashes = path.startswith('/')
+    if isinstance(path, bytes):
+        sep = b'/'
+        empty = b''
+        dot = b'.'
+        dotdot = b'..'
+    else:
+        sep = '/'
+        empty = ''
+        dot = '.'
+        dotdot = '..'
+    if path == empty:
+        return dot
+    initial_slashes = path.startswith(sep)
     # POSIX allows one or two initial slashes, but treats three or more
     # as single slash.
     if (initial_slashes and
-        path.startswith('//') and not path.startswith('///')):
+        path.startswith(sep*2) and not path.startswith(sep*3)):
         initial_slashes = 2
-    comps = path.split('/')
+    comps = path.split(sep)
     new_comps = []
     for comp in comps:
-        if comp in ('', '.'):
+        if comp in (empty, dot):
             continue
-        if (comp != '..' or (not initial_slashes and not new_comps) or
-             (new_comps and new_comps[-1] == '..')):
+        if (comp != dotdot or (not initial_slashes and not new_comps) or
+             (new_comps and new_comps[-1] == dotdot)):
             new_comps.append(comp)
         elif new_comps:
             new_comps.pop()
     comps = new_comps
-    path = '/'.join(comps)
+    path = sep.join(comps)
     if initial_slashes:
-        path = '/'*initial_slashes + path
-    return path or '.'
+        path = sep*initial_slashes + path
+    return path or dot
 
 
 def abspath(path):
     """Return an absolute path."""
     if not isabs(path):
-        path = join(os.getcwd(), path)
+        if isinstance(path, bytes):
+            cwd = os.getcwdb()
+        else:
+            cwd = os.getcwd()
+        path = join(cwd, path)
     return normpath(path)
 
 
@@ -305,10 +372,16 @@ def abspath(path):
 def realpath(filename):
     """Return the canonical path of the specified filename, eliminating any
 symbolic links encountered in the path."""
+    if isinstance(filename, bytes):
+        sep = b'/'
+        empty = b''
+    else:
+        sep = '/'
+        empty = ''
     if isabs(filename):
-        bits = ['/'] + filename.split('/')[1:]
+        bits = [sep] + filename.split(sep)[1:]
     else:
-        bits = [''] + filename.split('/')
+        bits = [empty] + filename.split(sep)
 
     for i in range(2, len(bits)+1):
         component = join(*bits[0:i])
@@ -347,12 +420,24 @@ def _resolve_link(path):
 
 supports_unicode_filenames = False
 
-def relpath(path, start=curdir):
+def relpath(path, start=None):
     """Return a relative version of a path"""
 
     if not path:
         raise ValueError("no path specified")
 
+    if isinstance(path, bytes):
+        curdir = b'.'
+        sep = b'/'
+        pardir = b'..'
+    else:
+        curdir = '.'
+        sep = '/'
+        pardir = '..'
+
+    if start is None:
+        start = curdir
+
     start_list = abspath(start).split(sep)
     path_list = abspath(path).split(sep)
 
index c593704e67492f1bad9385d06e19789e0c58d36c..30ccf6920a9bb92efd48ba329f1b56f1899e45ac 100644 (file)
@@ -37,6 +37,15 @@ class FnmatchTestCase(unittest.TestCase):
         check('a', r'[!\]')
         check('\\', r'[!\]', 0)
 
+    def test_mix_bytes_str(self):
+        self.assertRaises(TypeError, fnmatch, 'test', b'*')
+        self.assertRaises(TypeError, fnmatch, b'test', '*')
+        self.assertRaises(TypeError, fnmatchcase, 'test', b'*')
+        self.assertRaises(TypeError, fnmatchcase, b'test', '*')
+
+    def test_bytes(self):
+        self.check_match(b'test', b'te*')
+        self.check_match(b'test\xff', b'te*\xff')
 
 def test_main():
     support.run_unittest(FnmatchTestCase)
index 59f8c413b0dd9671b43c7b68eb03dde709a5e51c..45decb7a085574e114f19e3c6ac740a8f50e4aab 100644 (file)
@@ -29,7 +29,7 @@ class PosixTester(unittest.TestCase):
     def testNoArgFunctions(self):
         # test posix functions which take no arguments and have
         # no side-effects which we need to cleanup (e.g., fork, wait, abort)
-        NO_ARG_FUNCTIONS = [ "ctermid", "getcwd", "getcwdu", "uname",
+        NO_ARG_FUNCTIONS = [ "ctermid", "getcwd", "getcwdb", "uname",
                              "times", "getloadavg",
                              "getegid", "geteuid", "getgid", "getgroups",
                              "getpid", "getpgrp", "getppid", "getuid",
index 8a7dd72224b916c3e5cbb9be102e5aa7c728015c..3676e9f5ae14ec61752434dd35d70ccb645efe75 100644 (file)
@@ -31,20 +31,34 @@ class PosixPathTest(unittest.TestCase):
     def test_normcase(self):
         # Check that normcase() is idempotent
         p = "FoO/./BaR"
-        p = posixpath.normcase(p)
+        self.assertEqual(p, posixpath.normcase(p))
+
+        p = b"FoO/./BaR"
         self.assertEqual(p, posixpath.normcase(p))
 
         self.assertRaises(TypeError, posixpath.normcase)
 
     def test_join(self):
-        self.assertEqual(posixpath.join("/foo", "bar", "/bar", "baz"), "/bar/baz")
+        self.assertEqual(posixpath.join("/foo", "bar", "/bar", "baz"),
+                         "/bar/baz")
         self.assertEqual(posixpath.join("/foo", "bar", "baz"), "/foo/bar/baz")
-        self.assertEqual(posixpath.join("/foo/", "bar/", "baz/"), "/foo/bar/baz/")
+        self.assertEqual(posixpath.join("/foo/", "bar/", "baz/"),
+                         "/foo/bar/baz/")
+
+        self.assertEqual(posixpath.join(b"/foo", b"bar", b"/bar", b"baz"),
+                         b"/bar/baz")
+        self.assertEqual(posixpath.join(b"/foo", b"bar", b"baz"),
+                         b"/foo/bar/baz")
+        self.assertEqual(posixpath.join(b"/foo/", b"bar/", b"baz/"),
+                         b"/foo/bar/baz/")
 
         self.assertRaises(TypeError, posixpath.join)
+        self.assertRaises(TypeError, posixpath.join, b"bytes", "str")
+        self.assertRaises(TypeError, posixpath.join, "str", b"bytes")
 
     def test_splitdrive(self):
         self.assertEqual(posixpath.splitdrive("/foo/bar"), ("", "/foo/bar"))
+        self.assertEqual(posixpath.splitdrive(b"/foo/bar"), (b"", b"/foo/bar"))
 
         self.assertRaises(TypeError, posixpath.splitdrive)
 
@@ -55,15 +69,41 @@ class PosixPathTest(unittest.TestCase):
         self.assertEqual(posixpath.split("////foo"), ("////", "foo"))
         self.assertEqual(posixpath.split("//foo//bar"), ("//foo", "bar"))
 
+        self.assertEqual(posixpath.split(b"/foo/bar"), (b"/foo", b"bar"))
+        self.assertEqual(posixpath.split(b"/"), (b"/", b""))
+        self.assertEqual(posixpath.split(b"foo"), (b"", b"foo"))
+        self.assertEqual(posixpath.split(b"////foo"), (b"////", b"foo"))
+        self.assertEqual(posixpath.split(b"//foo//bar"), (b"//foo", b"bar"))
+
         self.assertRaises(TypeError, posixpath.split)
 
     def splitextTest(self, path, filename, ext):
         self.assertEqual(posixpath.splitext(path), (filename, ext))
         self.assertEqual(posixpath.splitext("/" + path), ("/" + filename, ext))
-        self.assertEqual(posixpath.splitext("abc/" + path), ("abc/" + filename, ext))
-        self.assertEqual(posixpath.splitext("abc.def/" + path), ("abc.def/" + filename, ext))
-        self.assertEqual(posixpath.splitext("/abc.def/" + path), ("/abc.def/" + filename, ext))
-        self.assertEqual(posixpath.splitext(path + "/"), (filename + ext + "/", ""))
+        self.assertEqual(posixpath.splitext("abc/" + path),
+                         ("abc/" + filename, ext))
+        self.assertEqual(posixpath.splitext("abc.def/" + path),
+                         ("abc.def/" + filename, ext))
+        self.assertEqual(posixpath.splitext("/abc.def/" + path),
+                         ("/abc.def/" + filename, ext))
+        self.assertEqual(posixpath.splitext(path + "/"),
+                         (filename + ext + "/", ""))
+
+        path = bytes(path, "ASCII")
+        filename = bytes(filename, "ASCII")
+        ext = bytes(ext, "ASCII")
+
+        self.assertEqual(posixpath.splitext(path), (filename, ext))
+        self.assertEqual(posixpath.splitext(b"/" + path),
+                         (b"/" + filename, ext))
+        self.assertEqual(posixpath.splitext(b"abc/" + path),
+                         (b"abc/" + filename, ext))
+        self.assertEqual(posixpath.splitext(b"abc.def/" + path),
+                         (b"abc.def/" + filename, ext))
+        self.assertEqual(posixpath.splitext(b"/abc.def/" + path),
+                         (b"/abc.def/" + filename, ext))
+        self.assertEqual(posixpath.splitext(path + b"/"),
+                         (filename + ext + b"/", b""))
 
     def test_splitext(self):
         self.splitextTest("foo.bar", "foo", ".bar")
@@ -87,12 +127,13 @@ class PosixPathTest(unittest.TestCase):
         self.assertIs(posixpath.isabs("/foo/bar"), True)
         self.assertIs(posixpath.isabs("foo/bar"), False)
 
-        self.assertRaises(TypeError, posixpath.isabs)
-
-    def test_splitdrive(self):
-        self.assertEqual(posixpath.splitdrive("/foo/bar"), ("", "/foo/bar"))
+        self.assertIs(posixpath.isabs(b""), False)
+        self.assertIs(posixpath.isabs(b"/"), True)
+        self.assertIs(posixpath.isabs(b"/foo"), True)
+        self.assertIs(posixpath.isabs(b"/foo/bar"), True)
+        self.assertIs(posixpath.isabs(b"foo/bar"), False)
 
-        self.assertRaises(TypeError, posixpath.splitdrive)
+        self.assertRaises(TypeError, posixpath.isabs)
 
     def test_basename(self):
         self.assertEqual(posixpath.basename("/foo/bar"), "bar")
@@ -101,6 +142,12 @@ class PosixPathTest(unittest.TestCase):
         self.assertEqual(posixpath.basename("////foo"), "foo")
         self.assertEqual(posixpath.basename("//foo//bar"), "bar")
 
+        self.assertEqual(posixpath.basename(b"/foo/bar"), b"bar")
+        self.assertEqual(posixpath.basename(b"/"), b"")
+        self.assertEqual(posixpath.basename(b"foo"), b"foo")
+        self.assertEqual(posixpath.basename(b"////foo"), b"foo")
+        self.assertEqual(posixpath.basename(b"//foo//bar"), b"bar")
+
         self.assertRaises(TypeError, posixpath.basename)
 
     def test_dirname(self):
@@ -110,6 +157,12 @@ class PosixPathTest(unittest.TestCase):
         self.assertEqual(posixpath.dirname("////foo"), "////")
         self.assertEqual(posixpath.dirname("//foo//bar"), "//foo")
 
+        self.assertEqual(posixpath.dirname(b"/foo/bar"), b"/foo")
+        self.assertEqual(posixpath.dirname(b"/"), b"/")
+        self.assertEqual(posixpath.dirname(b"foo"), b"")
+        self.assertEqual(posixpath.dirname(b"////foo"), b"////")
+        self.assertEqual(posixpath.dirname(b"//foo//bar"), b"//foo")
+
         self.assertRaises(TypeError, posixpath.dirname)
 
     def test_commonprefix(self):
@@ -130,6 +183,19 @@ class PosixPathTest(unittest.TestCase):
             "/home/swen/spam"
         )
 
+        self.assertEqual(
+            posixpath.commonprefix([b"/home/swenson/spam", b"/home/swen/spam"]),
+            b"/home/swen"
+        )
+        self.assertEqual(
+            posixpath.commonprefix([b"/home/swen/spam", b"/home/swen/eggs"]),
+            b"/home/swen/"
+        )
+        self.assertEqual(
+            posixpath.commonprefix([b"/home/swen/spam", b"/home/swen/spam"]),
+            b"/home/swen/spam"
+        )
+
         testlist = ['', 'abc', 'Xbcd', 'Xb', 'XY', 'abcd', 'aXc', 'abd', 'ab', 'aX', 'abcX']
         for s1 in testlist:
             for s2 in testlist:
@@ -330,20 +396,28 @@ class PosixPathTest(unittest.TestCase):
 
     def test_expanduser(self):
         self.assertEqual(posixpath.expanduser("foo"), "foo")
+        self.assertEqual(posixpath.expanduser(b"foo"), b"foo")
         try:
             import pwd
         except ImportError:
             pass
         else:
             self.assert_(isinstance(posixpath.expanduser("~/"), str))
+            self.assert_(isinstance(posixpath.expanduser(b"~/"), bytes))
             # if home directory == root directory, this test makes no sense
             if posixpath.expanduser("~") != '/':
                 self.assertEqual(
                     posixpath.expanduser("~") + "/",
                     posixpath.expanduser("~/")
                 )
+                self.assertEqual(
+                    posixpath.expanduser(b"~") + b"/",
+                    posixpath.expanduser(b"~/")
+                )
             self.assert_(isinstance(posixpath.expanduser("~root/"), str))
             self.assert_(isinstance(posixpath.expanduser("~foo/"), str))
+            self.assert_(isinstance(posixpath.expanduser(b"~root/"), bytes))
+            self.assert_(isinstance(posixpath.expanduser(b"~foo/"), bytes))
 
         self.assertRaises(TypeError, posixpath.expanduser)
 
@@ -366,6 +440,19 @@ class PosixPathTest(unittest.TestCase):
             self.assertEqual(posixpath.expandvars("${{foo}}"), "baz1}")
             self.assertEqual(posixpath.expandvars("$foo$foo"), "barbar")
             self.assertEqual(posixpath.expandvars("$bar$bar"), "$bar$bar")
+
+            self.assertEqual(posixpath.expandvars(b"foo"), b"foo")
+            self.assertEqual(posixpath.expandvars(b"$foo bar"), b"bar bar")
+            self.assertEqual(posixpath.expandvars(b"${foo}bar"), b"barbar")
+            self.assertEqual(posixpath.expandvars(b"$[foo]bar"), b"$[foo]bar")
+            self.assertEqual(posixpath.expandvars(b"$bar bar"), b"$bar bar")
+            self.assertEqual(posixpath.expandvars(b"$?bar"), b"$?bar")
+            self.assertEqual(posixpath.expandvars(b"${foo}bar"), b"barbar")
+            self.assertEqual(posixpath.expandvars(b"$foo}bar"), b"bar}bar")
+            self.assertEqual(posixpath.expandvars(b"${foo"), b"${foo")
+            self.assertEqual(posixpath.expandvars(b"${{foo}}"), b"baz1}")
+            self.assertEqual(posixpath.expandvars(b"$foo$foo"), b"barbar")
+            self.assertEqual(posixpath.expandvars(b"$bar$bar"), b"$bar$bar")
         finally:
             os.environ.clear()
             os.environ.update(oldenv)
@@ -378,18 +465,31 @@ class PosixPathTest(unittest.TestCase):
         self.assertEqual(posixpath.normpath("//"), "//")
         self.assertEqual(posixpath.normpath("///"), "/")
         self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar")
-        self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"), "/foo/baz")
+        self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"),
+                         "/foo/baz")
         self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar")
 
+        self.assertEqual(posixpath.normpath(b""), b".")
+        self.assertEqual(posixpath.normpath(b"/"), b"/")
+        self.assertEqual(posixpath.normpath(b"//"), b"//")
+        self.assertEqual(posixpath.normpath(b"///"), b"/")
+        self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar")
+        self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"),
+                         b"/foo/baz")
+        self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"),
+                         b"/foo/bar")
+
         self.assertRaises(TypeError, posixpath.normpath)
 
     def test_abspath(self):
         self.assert_("foo" in posixpath.abspath("foo"))
+        self.assert_(b"foo" in posixpath.abspath(b"foo"))
 
         self.assertRaises(TypeError, posixpath.abspath)
 
     def test_realpath(self):
         self.assert_("foo" in realpath("foo"))
+        self.assert_(b"foo" in realpath(b"foo"))
         self.assertRaises(TypeError, posixpath.realpath)
 
     if hasattr(os, "symlink"):
@@ -499,12 +599,34 @@ class PosixPathTest(unittest.TestCase):
             self.assertEqual(posixpath.relpath("a/b"), "a/b")
             self.assertEqual(posixpath.relpath("../a/b"), "../a/b")
             self.assertEqual(posixpath.relpath("a", "../b"), "../"+curdir+"/a")
-            self.assertEqual(posixpath.relpath("a/b", "../c"), "../"+curdir+"/a/b")
+            self.assertEqual(posixpath.relpath("a/b", "../c"),
+                             "../"+curdir+"/a/b")
             self.assertEqual(posixpath.relpath("a", "b/c"), "../../a")
             self.assertEqual(posixpath.relpath("a", "a"), ".")
         finally:
             os.getcwd = real_getcwd
 
+    def test_relpath_bytes(self):
+        (real_getcwdb, os.getcwdb) = (os.getcwdb, lambda: br"/home/user/bar")
+        try:
+            curdir = os.path.split(os.getcwdb())[-1]
+            self.assertRaises(ValueError, posixpath.relpath, b"")
+            self.assertEqual(posixpath.relpath(b"a"), b"a")
+            self.assertEqual(posixpath.relpath(posixpath.abspath(b"a")), b"a")
+            self.assertEqual(posixpath.relpath(b"a/b"), b"a/b")
+            self.assertEqual(posixpath.relpath(b"../a/b"), b"../a/b")
+            self.assertEqual(posixpath.relpath(b"a", b"../b"),
+                             b"../"+curdir+b"/a")
+            self.assertEqual(posixpath.relpath(b"a/b", b"../c"),
+                             b"../"+curdir+b"/a/b")
+            self.assertEqual(posixpath.relpath(b"a", b"b/c"), b"../../a")
+            self.assertEqual(posixpath.relpath(b"a", b"a"), b".")
+
+            self.assertRaises(TypeError, posixpath.relpath, b"bytes", "str")
+            self.assertRaises(TypeError, posixpath.relpath, "str", b"bytes")
+        finally:
+            os.getcwdb = real_getcwdb
+
 def test_main():
     support.run_unittest(PosixPathTest)
 
index cc670db92210a7a598ffc6880d64c2e54fdf0a47..9caadd84047dcd616a8cea33284ccb951e1cef39 100644 (file)
@@ -90,7 +90,7 @@ class TestUnicodeFiles(unittest.TestCase):
         os.unlink(filename1 + ".new")
 
     def _do_directory(self, make_name, chdir_name, encoded):
-        cwd = os.getcwd()
+        cwd = os.getcwdb()
         if os.path.isdir(make_name):
             os.rmdir(make_name)
         os.mkdir(make_name)
@@ -98,10 +98,10 @@ class TestUnicodeFiles(unittest.TestCase):
             os.chdir(chdir_name)
             try:
                 if not encoded:
-                    cwd_result = os.getcwdu()
+                    cwd_result = os.getcwd()
                     name_result = make_name
                 else:
-                    cwd_result = os.getcwd().decode(TESTFN_ENCODING)
+                    cwd_result = os.getcwdb().decode(TESTFN_ENCODING)
                     name_result = make_name.decode(TESTFN_ENCODING)
 
                 cwd_result = unicodedata.normalize("NFD", cwd_result)
index 51206bdda412a67b8f5eee1ff11087e1bfa35f35..9e047675decb05ed446ebeb50950f55d0cb3fd80 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -4,8 +4,11 @@ Python News
 
 (editors: check NEWS.help for information about editing NEWS using ReST.)
 
-What's New in Python 3.0 release candidate 2
-============================================
+What's New in Python 3.0 beta 5
+===============================
+
+[Note: due to the number of unresolved issues we're going back to beta
+ releases for a while.]
 
 *Release date: XX-XXX-2008*
 
@@ -22,6 +25,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #3187: Better support for "undecodable" filenames.  Code by Victor
+  Stinner, with small tweaks by GvR.
+
 - Issue #3965: Allow repeated calls to turtle.Screen, by making it a
   true singleton object.
 
index f6a4d956a9f369eb5a0c8b126d778c9b86579af2..03959f733fc18fb140f1d00018da81708be58a96 100644 (file)
@@ -1968,63 +1968,18 @@ posix_lchown(PyObject *self, PyObject *args)
 
 
 #ifdef HAVE_GETCWD
-PyDoc_STRVAR(posix_getcwd__doc__,
-"getcwd() -> path\n\n\
-Return a string representing the current working directory.");
-
 static PyObject *
-posix_getcwd(PyObject *self, PyObject *noargs)
-{
-       int bufsize_incr = 1024;
-       int bufsize = 0;
-       char *tmpbuf = NULL;
-       char *res = NULL;
-       PyObject *dynamic_return;
-
-       Py_BEGIN_ALLOW_THREADS
-       do {
-               bufsize = bufsize + bufsize_incr;
-               tmpbuf = malloc(bufsize);
-               if (tmpbuf == NULL) {
-                       break;
-               }
-#if defined(PYOS_OS2) && defined(PYCC_GCC)
-               res = _getcwd2(tmpbuf, bufsize);
-#else
-               res = getcwd(tmpbuf, bufsize);
-#endif
-
-               if (res == NULL) {
-                       free(tmpbuf);
-               }
-       } while ((res == NULL) && (errno == ERANGE));
-       Py_END_ALLOW_THREADS
-
-       if (res == NULL)
-               return posix_error();
-
-       dynamic_return = PyUnicode_FromString(tmpbuf);
-       free(tmpbuf);
-
-       return dynamic_return;
-}
-
-PyDoc_STRVAR(posix_getcwdu__doc__,
-"getcwdu() -> path\n\n\
-Return a unicode string representing the current working directory.");
-
-static PyObject *
-posix_getcwdu(PyObject *self, PyObject *noargs)
+posix_getcwd(int use_bytes)
 {
        char buf[1026];
        char *res;
 
 #ifdef Py_WIN_WIDE_FILENAMES
-       DWORD len;
-       if (unicode_file_names()) {
+       if (!use_bytes && unicode_file_names()) {
                wchar_t wbuf[1026];
                wchar_t *wbuf2 = wbuf;
                PyObject *resobj;
+               DWORD len;
                Py_BEGIN_ALLOW_THREADS
                len = GetCurrentDirectoryW(sizeof wbuf/ sizeof wbuf[0], wbuf);
                /* If the buffer is large enough, len does not include the
@@ -2059,8 +2014,30 @@ posix_getcwdu(PyObject *self, PyObject *noargs)
        Py_END_ALLOW_THREADS
        if (res == NULL)
                return posix_error();
+       if (use_bytes)
+               return PyBytes_FromStringAndSize(buf, strlen(buf));
        return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
 }
+
+PyDoc_STRVAR(posix_getcwd__doc__,
+"getcwd() -> path\n\n\
+Return a unicode string representing the current working directory.");
+
+static PyObject *
+posix_getcwd_unicode(PyObject *self)
+{
+    return posix_getcwd(0);
+}
+
+PyDoc_STRVAR(posix_getcwdb__doc__,
+"getcwdb() -> path\n\n\
+Return a bytes string representing the current working directory.");
+
+static PyObject *
+posix_getcwd_bytes(PyObject *self)
+{
+    return posix_getcwd(1);
+}
 #endif
 
 
@@ -2378,9 +2355,12 @@ posix_listdir(PyObject *self, PyObject *args)
                                v = w;
                        }
                        else {
-                               /* fall back to the original byte string, as
-                                  discussed in patch #683592 */
+                               /* Ignore undecodable filenames, as discussed
+                                * in issue 3187. To include these,
+                                * use getcwdb(). */
                                PyErr_Clear();
+                               Py_DECREF(v);
+                               continue;
                        }
                }
                if (PyList_Append(d, v) != 0) {
@@ -4477,9 +4457,7 @@ posix_readlink(PyObject *self, PyObject *args)
                        v = w;
                }
                else {
-                       /* fall back to the original byte string, as
-                          discussed in patch #683592 */
-                       PyErr_Clear();
+                       v = NULL;
                }
        }
        return v;
@@ -6810,8 +6788,10 @@ static PyMethodDef posix_methods[] = {
        {"ctermid",     posix_ctermid, METH_NOARGS, posix_ctermid__doc__},
 #endif
 #ifdef HAVE_GETCWD
-       {"getcwd",      posix_getcwd, METH_NOARGS, posix_getcwd__doc__},
-       {"getcwdu",     posix_getcwdu, METH_NOARGS, posix_getcwdu__doc__},
+       {"getcwd",      (PyCFunction)posix_getcwd_unicode,
+       METH_NOARGS, posix_getcwd__doc__},
+       {"getcwdb",     (PyCFunction)posix_getcwd_bytes,
+       METH_NOARGS, posix_getcwdb__doc__},
 #endif
 #ifdef HAVE_LINK
        {"link",        posix_link, METH_VARARGS, posix_link__doc__},