]> granicus.if.org Git - python/commitdiff
Issue #5915: Implement PEP 383, Non-decodable Bytes in
authorMartin v. Löwis <martin@v.loewis.de>
Tue, 5 May 2009 04:43:17 +0000 (04:43 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Tue, 5 May 2009 04:43:17 +0000 (04:43 +0000)
System Character Interfaces.

15 files changed:
Doc/library/codecs.rst
Doc/library/os.rst
Include/unicodeobject.h
Lib/test/test_codecs.py
Lib/test/test_os.py
Misc/NEWS
Modules/_io/fileio.c
Modules/posixmodule.c
Modules/python.c
Objects/unicodeobject.c
Python/codecs.c
Python/pythonrun.c
configure
configure.in
pyconfig.h.in

index ab578ea2817ede0e67ec3e9f7944cb89105b636f..3f1a5fec39ee8c2cac1f45ccca891398859fd3ab 100644 (file)
@@ -322,6 +322,8 @@ and implemented by all standard Python codecs:
 | ``'backslashreplace'``  | Replace with backslashed escape sequences     |
 |                         | (only for encoding).                          |
 +-------------------------+-----------------------------------------------+
+| ``'utf8b'``             | Replace byte with surrogate U+DCxx.           |
++-------------------------+-----------------------------------------------+
 
 In addition, the following error handlers are specific to a single codec:
 
@@ -333,7 +335,7 @@ In addition, the following error handlers are specific to a single codec:
 +------------------+---------+--------------------------------------------+
 
 .. versionadded:: 3.1
-   The ``'surrogates'`` error handler.
+   The ``'utf8b'`` and ``'surrogates'`` error handlers.
 
 The set of allowed values can be extended via :meth:`register_error`.
 
index c686baf660df6b21bfa99986b3d9a5273505b44d..83f5ee9dc0e8d02727c0f93aa17e4b0dd721b9a4 100644 (file)
@@ -51,6 +51,30 @@ the :mod:`os` module, but using them is of course a threat to portability!
    ``'ce'``, ``'java'``.
 
 
+.. _os-filenames:
+
+File Names, Command Line Arguments, and Environment Variables
+-------------------------------------------------------------
+
+In Python, file names, command line arguments, and environment
+variables are represented using the string type. On some systems,
+decoding these strings to and from bytes is necessary before passing
+them to the operating system. Python uses the file system encoding to
+perform this conversion (see :func:`sys.getfilesystemencoding`).
+
+.. versionchanged:: 3.1
+   On some systems, conversion using the file system encoding may
+   fail. In this case, Python uses the ``utf8b`` encoding error
+   handler, which means that undecodable bytes are replaced by a
+   Unicode character U+DCxx on decoding, and these are again
+   translated to the original byte on encoding.
+
+
+The file system encoding must guarantee to successfully decode all
+bytes below 128. If the file system encoding fails to provide this
+guarantee, API functions may raise UnicodeErrors.
+
+
 .. _os-procinfo:
 
 Process Parameters
@@ -688,12 +712,8 @@ Files and Directories
 
 .. function:: getcwd()
 
-   Return a string representing the current working directory.  On Unix
-   platforms, this function may raise :exc:`UnicodeDecodeError` if the name of
-   the current directory is not decodable in the file system encoding.  Use
-   :func:`getcwdb` if you need the call to never fail. Availability: Unix,
-   Windows.
-
+   Return a string representing the current working directory.
+   Availability: Unix, Windows.
 
 .. function:: getcwdb()
 
@@ -800,10 +820,8 @@ Files and Directories
    entries ``'.'`` and ``'..'`` even if they are present in the directory.
    Availability: Unix, Windows.
 
-   This function can be called with a bytes or string argument.  In the bytes
-   case, all filenames will be listed as returned by the underlying API.  In the
-   string case, filenames will be decoded using the file system encoding, and
-   skipped if a decoding error occurs.
+   This function can be called with a bytes or string argument, and returns
+   filenames of the same datatype.
 
 
 .. function:: lstat(path)
index 9c1187313bbda81cf8d86ada458e8832cf50d6e0..08b518a8d757555e5c89423c957f1d0c2e639e16 100644 (file)
@@ -198,6 +198,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
@@ -296,6 +297,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
+# define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
@@ -693,25 +695,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
     PyObject *unicode,
     const char *errors);
 
-/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
-
-   If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
-   UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
-   invalid characters with '?'.
-
-   The function is intended to be used for paths and file names only
-   during bootstrapping process where the codecs are not set up.
-*/
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
-    const char *s               /* encoded string */
-    );
-
-PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
-    const char *s,               /* encoded string */
-    Py_ssize_t size              /* size */
-    );
-
 /* Returns a pointer to the default encoding (normally, UTF-8) of the
    Unicode object unicode and the size of the encoded representation
    in bytes stored in *size.
@@ -1252,6 +1235,33 @@ PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
     const char *errors         /* error handling */
     );
 
+/* --- File system encoding ---------------------------------------------- */
+
+/* ParseTuple converter which converts a Unicode object into the file
+   system encoding, using the PEP 383 error handler; bytes objects are
+   output as-is. */
+
+PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
+
+/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
+
+   If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
+   UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
+   invalid characters with '?'.
+
+   The function is intended to be used for paths and file names only
+   during bootstrapping process where the codecs are not set up.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
+    const char *s               /* encoded string */
+    );
+
+PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
+    const char *s,               /* encoded string */
+    Py_ssize_t size              /* size */
+    );
+
 /* --- Methods & Slots ----------------------------------------------------
 
    These are capable of handling Unicode objects and strings on input
index 6706507335e9289dd4209629f676adf6742be811..5a3834d49592f79d43c266e2f7c59aec699cdf34 100644 (file)
@@ -1516,6 +1516,34 @@ class TypesTest(unittest.TestCase):
         self.assertEquals(codecs.raw_unicode_escape_decode(r"\u1234"), ("\u1234", 6))
         self.assertEquals(codecs.raw_unicode_escape_decode(br"\u1234"), ("\u1234", 6))
 
+class Utf8bTest(unittest.TestCase):
+
+    def test_utf8(self):
+        # Bad byte
+        self.assertEqual(b"foo\x80bar".decode("utf-8", "utf8b"),
+                         "foo\udc80bar")
+        self.assertEqual("foo\udc80bar".encode("utf-8", "utf8b"),
+                         b"foo\x80bar")
+        # bad-utf-8 encoded surrogate
+        self.assertEqual(b"\xed\xb0\x80".decode("utf-8", "utf8b"),
+                         "\udced\udcb0\udc80")
+        self.assertEqual("\udced\udcb0\udc80".encode("utf-8", "utf8b"),
+                         b"\xed\xb0\x80")
+
+    def test_ascii(self):
+        # bad byte
+        self.assertEqual(b"foo\x80bar".decode("ascii", "utf8b"),
+                         "foo\udc80bar")
+        self.assertEqual("foo\udc80bar".encode("ascii", "utf8b"),
+                         b"foo\x80bar")
+
+    def test_charmap(self):
+        # bad byte: \xa5 is unmapped in iso-8859-3
+        self.assertEqual(b"foo\xa5bar".decode("iso-8859-3", "utf8b"),
+                         "foo\udca5bar")
+        self.assertEqual("foo\udca5bar".encode("iso-8859-3", "utf8b"),
+                         b"foo\xa5bar")
+
 
 def test_main():
     support.run_unittest(
@@ -1543,6 +1571,7 @@ def test_main():
         CharmapTest,
         WithStmtTest,
         TypesTest,
+        Utf8bTest,
     )
 
 
index 91e04328d576e3bffa4d4874f3c5b0052fb32086..a380505da635d65ef815854177ca2fde945b27c2 100644 (file)
@@ -7,6 +7,7 @@ import errno
 import unittest
 import warnings
 import sys
+import shutil
 from test import support
 
 # Tests creating TESTFN
@@ -698,9 +699,44 @@ if sys.platform != 'win32':
                     self.assertRaises(os.error, os.setregid, 0, 0)
                 self.assertRaises(OverflowError, os.setregid, 1<<32, 0)
                 self.assertRaises(OverflowError, os.setregid, 0, 1<<32)
+
+    class Pep383Tests(unittest.TestCase):
+        filenames = [b'foo\xf6bar', 'foo\xf6bar'.encode("utf-8")]
+
+        def setUp(self):
+            self.fsencoding = sys.getfilesystemencoding()
+            sys.setfilesystemencoding("utf-8")
+            self.dir = support.TESTFN
+            self.bdir = self.dir.encode("utf-8", "utf8b")
+            os.mkdir(self.dir)
+            self.unicodefn = []
+            for fn in self.filenames:
+                f = open(os.path.join(self.bdir, fn), "w")
+                f.close()
+                self.unicodefn.append(fn.decode("utf-8", "utf8b"))
+
+        def tearDown(self):
+            shutil.rmtree(self.dir)
+            sys.setfilesystemencoding(self.fsencoding)
+
+        def test_listdir(self):
+            expected = set(self.unicodefn)
+            found = set(os.listdir(support.TESTFN))
+            self.assertEquals(found, expected)
+
+        def test_open(self):
+            for fn in self.unicodefn:
+                f = open(os.path.join(self.dir, fn))
+                f.close()
+
+        def test_stat(self):
+            for fn in self.unicodefn:
+                os.stat(os.path.join(self.dir, fn))
 else:
     class PosixUidGidTests(unittest.TestCase):
         pass
+    class Pep383Tests(unittest.TestCase):
+        pass
 
 def test_main():
     support.run_unittest(
@@ -714,7 +750,8 @@ def test_main():
         ExecTests,
         Win32ErrorTests,
         TestInvalidFD,
-        PosixUidGidTests
+        PosixUidGidTests,
+        Pep383Tests
     )
 
 if __name__ == "__main__":
index a384c41e7c18edee51e756536ae2956847196cc3..2e4c6bd84c79fcfcdcb6406f72b037b64d3dd0aa 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 3.1 beta 1?
 Core and Builtins
 -----------------
 
+- Implement PEP 383, Non-decodable Bytes in System Character Interfaces.
+
 - Issue #5890: in subclasses of 'property' the __doc__ attribute was
   shadowed by classtype's, even if it was None.  property now
   inserts the __doc__ into the subclass instance __dict__.
index 4499ee242cf28b6d28ad164d2c468fcdf405a1c7..164f7e46d1b6b20de4aac096087ed4ba9280ea79 100644 (file)
@@ -245,7 +245,7 @@ fileio_init(PyObject *oself, PyObject *args, PyObject *kwds)
                                return -1;
 
                        stringobj = PyUnicode_AsEncodedString(
-                               u, Py_FileSystemDefaultEncoding, NULL);
+                               u, Py_FileSystemDefaultEncoding, "utf8b");
                        Py_DECREF(u);
                        if (stringobj == NULL)
                                return -1;
index 0575be23bba1e5afb45bad6b635f10891103551a..d38a4dbefd7489c467da1c193592ba4db94d88b3 100644 (file)
@@ -493,12 +493,14 @@ convertenviron(void)
                char *p = strchr(*e, '=');
                if (p == NULL)
                        continue;
-               k = PyUnicode_FromStringAndSize(*e, (int)(p-*e));
+               k = PyUnicode_Decode(*e, (int)(p-*e),
+                                    Py_FileSystemDefaultEncoding, "utf8b");
                if (k == NULL) {
                        PyErr_Clear();
                        continue;
                }
-               v = PyUnicode_FromString(p+1);
+               v = PyUnicode_Decode(p+1, strlen(p+1),
+                                    Py_FileSystemDefaultEncoding, "utf8b");
                if (v == NULL) {
                        PyErr_Clear();
                        Py_DECREF(k);
@@ -534,6 +536,37 @@ convertenviron(void)
        return d;
 }
 
+/* Convert a bytes object to a char*. Optionally lock the buffer if it is a
+   bytes array. */
+
+static char*
+bytes2str(PyObject* o, int lock)
+{
+       if(PyBytes_Check(o))
+               return PyBytes_AsString(o);
+       else if(PyByteArray_Check(o)) {
+               if (lock && PyObject_GetBuffer(o, NULL, 0) < 0)
+                       /* On a bytearray, this should not fail. */
+                       PyErr_BadInternalCall();
+               return PyByteArray_AsString(o);
+       } else {
+               /* The FS converter should have verified that this
+                  is either bytes or bytearray. */
+               Py_FatalError("bad object passed to bytes2str");
+               /* not reached. */
+               return "";
+       }
+}
+
+/* Release the lock, decref the object. */
+static void
+release_bytes(PyObject* o)
+{
+       if (PyByteArray_Check(o))
+               o->ob_type->tp_as_buffer->bf_releasebuffer(NULL, 0);
+       Py_DECREF(o);
+}
+
 
 /* Set a POSIX-specific error from errno, and return NULL */
 
@@ -558,10 +591,11 @@ posix_error_with_unicode_filename(Py_UNICODE* name)
 
 
 static PyObject *
-posix_error_with_allocated_filename(char* name)
+posix_error_with_allocated_filename(PyObject* name)
 {
-       PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, name);
-       PyMem_Free(name);
+       PyObject *rc = PyErr_SetFromErrnoWithFilename(PyExc_OSError, 
+                                                     bytes2str(name, 0));
+       release_bytes(name);
        return rc;
 }
 
@@ -728,17 +762,19 @@ unicode_file_names(void)
 static PyObject *
 posix_1str(PyObject *args, char *format, int (*func)(const char*))
 {
-       char *path1 = NULL;
+       PyObject *opath1 = NULL;
+       char *path1;
        int res;
        if (!PyArg_ParseTuple(args, format,
-                             Py_FileSystemDefaultEncoding, &path1))
+                             PyUnicode_FSConverter, &opath1))
                return NULL;
+       path1 = bytes2str(opath1, 1);
        Py_BEGIN_ALLOW_THREADS
        res = (*func)(path1);
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path1);
-       PyMem_Free(path1);
+               return posix_error_with_allocated_filename(opath1);
+       release_bytes(opath1);
        Py_INCREF(Py_None);
        return Py_None;
 }
@@ -748,17 +784,20 @@ posix_2str(PyObject *args,
           char *format,
           int (*func)(const char *, const char *))
 {
-       char *path1 = NULL, *path2 = NULL;
+       PyObject *opath1, *opath2;
+       char *path1, *path2;
        int res;
        if (!PyArg_ParseTuple(args, format,
-                             Py_FileSystemDefaultEncoding, &path1,
-                             Py_FileSystemDefaultEncoding, &path2))
+                             PyUnicode_FSConverter, &opath1,
+                             PyUnicode_FSConverter, &opath2))
                return NULL;
+       path1 = bytes2str(opath1, 1);
+       path2 = bytes2str(opath2, 1);
        Py_BEGIN_ALLOW_THREADS
        res = (*func)(path1, path2);
        Py_END_ALLOW_THREADS
-       PyMem_Free(path1);
-       PyMem_Free(path2);
+       release_bytes(opath1);
+       release_bytes(opath2);
        if (res != 0)
                /* XXX how to report both path1 and path2??? */
                return posix_error();
@@ -1560,8 +1599,8 @@ posix_do_stat(PyObject *self, PyObject *args,
              int (*wstatfunc)(const Py_UNICODE *, STRUCT_STAT *))
 {
        STRUCT_STAT st;
-       char *path = NULL;      /* pass this to stat; do not free() it */
-       char *pathfree = NULL;  /* this memory must be free'd */
+       PyObject *opath;
+       char *path;
        int res;
        PyObject *result;
 
@@ -1590,25 +1629,24 @@ posix_do_stat(PyObject *self, PyObject *args,
 #endif
 
        if (!PyArg_ParseTuple(args, format,
-                             Py_FileSystemDefaultEncoding, &path))
+                             PyUnicode_FSConverter, &opath))
                return NULL;
-       pathfree = path;
-
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        res = (*statfunc)(path, &st);
        Py_END_ALLOW_THREADS
 
        if (res != 0) {
 #ifdef MS_WINDOWS
-               result = win32_error("stat", pathfree);
+               result = win32_error("stat", path);
 #else
-               result = posix_error_with_filename(pathfree);
+               result = posix_error_with_filename(path);
 #endif
        } 
        else
                result = _pystat_fromstructstat(&st);
 
-       PyMem_Free(pathfree);
+       release_bytes(opath);
        return result;
 }
 
@@ -1625,6 +1663,7 @@ existence, or the inclusive-OR of R_OK, W_OK, and X_OK.");
 static PyObject *
 posix_access(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        int mode;
        
@@ -1644,13 +1683,14 @@ posix_access(PyObject *self, PyObject *args)
                   are also valid. */
                PyErr_Clear();
        }
-       if (!PyArg_ParseTuple(args, "eti:access",
-                             Py_FileSystemDefaultEncoding, &path, &mode))
+       if (!PyArg_ParseTuple(args, "O&i:access",
+                             PyUnicode_FSConverter, &opath, &mode))
                return 0;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        attr = GetFileAttributesA(path);
        Py_END_ALLOW_THREADS
-       PyMem_Free(path);
+       release_bytes(opath);
 finish:
        if (attr == 0xFFFFFFFF)
                /* File does not exist, or cannot read attributes */
@@ -1663,13 +1703,14 @@ finish:
                               || (attr & FILE_ATTRIBUTE_DIRECTORY));
 #else
        int res;
-       if (!PyArg_ParseTuple(args, "eti:access", 
-                             Py_FileSystemDefaultEncoding, &path, &mode))
+       if (!PyArg_ParseTuple(args, "O&i:access", 
+                             PyUnicode_FSConverter, &opath, &mode))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        res = access(path, mode);
        Py_END_ALLOW_THREADS
-       PyMem_Free(path);
+       release_bytes(opath);
        return PyBool_FromLong(res == 0);
 #endif
 }
@@ -1750,11 +1791,11 @@ posix_chdir(PyObject *self, PyObject *args)
 #ifdef MS_WINDOWS
        return win32_1str(args, "chdir", "y:chdir", win32_chdir, "U:chdir", win32_wchdir);
 #elif defined(PYOS_OS2) && defined(PYCC_GCC)
-       return posix_1str(args, "et:chdir", _chdir2);
+       return posix_1str(args, "O&:chdir", _chdir2);
 #elif defined(__VMS)
-       return posix_1str(args, "et:chdir", (int (*)(const char *))chdir);
+       return posix_1str(args, "O&:chdir", (int (*)(const char *))chdir);
 #else
-       return posix_1str(args, "et:chdir", chdir);
+       return posix_1str(args, "O&:chdir", chdir);
 #endif
 }
 
@@ -1779,6 +1820,7 @@ Change the access permissions of a file.");
 static PyObject *
 posix_chmod(PyObject *self, PyObject *args)
 {
+       PyObject *opath = NULL;
        char *path = NULL;
        int i;
        int res;
@@ -1809,9 +1851,10 @@ posix_chmod(PyObject *self, PyObject *args)
                   are also valid. */
                PyErr_Clear();
        }
-       if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
-                             &path, &i))
+       if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+                             &opath, &i))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        attr = GetFileAttributesA(path);
        if (attr != 0xFFFFFFFF) {
@@ -1826,22 +1869,23 @@ posix_chmod(PyObject *self, PyObject *args)
        Py_END_ALLOW_THREADS
        if (!res) {
                win32_error("chmod", path);
-               PyMem_Free(path);
+               release_bytes(opath);
                return NULL;
        }
-       PyMem_Free(path);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 #else /* Py_WIN_WIDE_FILENAMES */
-       if (!PyArg_ParseTuple(args, "eti:chmod", Py_FileSystemDefaultEncoding,
-                             &path, &i))
+       if (!PyArg_ParseTuple(args, "O&i:chmod", PyUnicode_FSConverter,
+                             &opath, &i))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        res = chmod(path, i);
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path);
-       PyMem_Free(path);
+               return posix_error_with_allocated_filename(opath);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 #endif
@@ -1877,18 +1921,20 @@ affects the link itself rather than the target.");
 static PyObject *
 posix_lchmod(PyObject *self, PyObject *args)
 {
-       char *path = NULL;
+       PyObject *opath;
+       char *path;
        int i;
        int res;
-       if (!PyArg_ParseTuple(args, "eti:lchmod", Py_FileSystemDefaultEncoding,
-                             &path, &i))
+       if (!PyArg_ParseTuple(args, "O&i:lchmod", PyUnicode_FSConverter,
+                             &opath, &i))
                return NULL;
+       path = bytes2str(opath, 1)
        Py_BEGIN_ALLOW_THREADS
        res = lchmod(path, i);
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path);
-       PyMem_Free(path);
+               return posix_error_with_allocated_filename(opath);
+       release_bytes(opath);
        Py_RETURN_NONE;
 }
 #endif /* HAVE_LCHMOD */
@@ -1902,18 +1948,20 @@ Set file flags.");
 static PyObject *
 posix_chflags(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        unsigned long flags;
        int res;
-       if (!PyArg_ParseTuple(args, "etk:chflags",
-                             Py_FileSystemDefaultEncoding, &path, &flags))
+       if (!PyArg_ParseTuple(args, "O&k:chflags",
+                             PyUnicode_FSConverter, &opath, &flags))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        res = chflags(path, flags);
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path);
-       PyMem_Free(path);
+               return posix_error_with_allocated_filename(opath);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 }
@@ -1928,18 +1976,20 @@ This function will not follow symbolic links.");
 static PyObject *
 posix_lchflags(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        unsigned long flags;
        int res;
-       if (!PyArg_ParseTuple(args, "etk:lchflags",
-                             Py_FileSystemDefaultEncoding, &path, &flags))
+       if (!PyArg_ParseTuple(args, "O&k:lchflags",
+                             PyUnicode_FSConverter, &path, &flags))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        res = lchflags(path, flags);
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path);
-       PyMem_Free(path);
+               return posix_error_with_allocated_filename(opath);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 }
@@ -1953,7 +2003,7 @@ Change root directory to path.");
 static PyObject *
 posix_chroot(PyObject *self, PyObject *args)
 {
-       return posix_1str(args, "et:chroot", chroot);
+       return posix_1str(args, "O&:chroot", chroot);
 }
 #endif
 
@@ -1996,19 +2046,21 @@ Change the owner and group id of path to the numeric uid and gid.");
 static PyObject *
 posix_chown(PyObject *self, PyObject *args)
 {
-       char *path = NULL;
+       PyObject *opath;
+       char *path;
        long uid, gid;
        int res;
-       if (!PyArg_ParseTuple(args, "etll:chown",
-                             Py_FileSystemDefaultEncoding, &path,
+       if (!PyArg_ParseTuple(args, "O&ll:chown",
+                             PyUnicode_FSConverter, &opath,
                              &uid, &gid))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        res = chown(path, (uid_t) uid, (gid_t) gid);
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path);
-       PyMem_Free(path);
+               return posix_error_with_allocated_filename(opath);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 }
@@ -2045,19 +2097,21 @@ This function will not follow symbolic links.");
 static PyObject *
 posix_lchown(PyObject *self, PyObject *args)
 {
-       char *path = NULL;
+       PyObject *opath;
+       char *path;
        int uid, gid;
        int res;
-       if (!PyArg_ParseTuple(args, "etii:lchown",
-                             Py_FileSystemDefaultEncoding, &path,
+       if (!PyArg_ParseTuple(args, "O&ii:lchown",
+                             PyUnicode_FSConverter, &opath,
                              &uid, &gid))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        res = lchown(path, (uid_t) uid, (gid_t) gid);
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path);
-       PyMem_Free(path);
+               return posix_error_with_allocated_filename(opath);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 }
@@ -2113,7 +2167,7 @@ posix_getcwd(int use_bytes)
                return posix_error();
        if (use_bytes)
                return PyBytes_FromStringAndSize(buf, strlen(buf));
-       return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"strict");
+       return PyUnicode_Decode(buf, strlen(buf), Py_FileSystemDefaultEncoding,"utf8b");
 }
 
 PyDoc_STRVAR(posix_getcwd__doc__,
@@ -2146,7 +2200,7 @@ Create a hard link to a file.");
 static PyObject *
 posix_link(PyObject *self, PyObject *args)
 {
-       return posix_2str(args, "etet:link", link);
+       return posix_2str(args, "O&O&:link", link);
 }
 #endif /* HAVE_LINK */
 
@@ -2171,6 +2225,7 @@ posix_listdir(PyObject *self, PyObject *args)
        HANDLE hFindFile;
        BOOL result;
        WIN32_FIND_DATA FileData;
+       PyObject *opath;
        char namebuf[MAX_PATH+5]; /* Overallocate for \\*.*\0 */
        char *bufptr = namebuf;
        Py_ssize_t len = sizeof(namebuf)-5; /* only claim to have space for MAX_PATH */
@@ -2260,9 +2315,16 @@ posix_listdir(PyObject *self, PyObject *args)
        }
 #endif
 
-       if (!PyArg_ParseTuple(args, "et#:listdir",
-                             Py_FileSystemDefaultEncoding, &bufptr, &len))
+       if (!PyArg_ParseTuple(args, "O&:listdir",
+                             PyUnicode_FSConverter, &opath))
+               return NULL;
+       if (PyObject_Size(opath)+1 > MAX_PATH) {
+               PyErr_SetString(PyExc_ValueError, "path too long");
+               Py_DECREF(opath);
                return NULL;
+       }
+       strcpy(namebuf, bytes2str(opath, 0));
+       len = PyObject_Size(opath);
        if (len > 0) {
                char ch = namebuf[len-1];
                if (ch != SEP && ch != ALTSEP && ch != ':')
@@ -2324,6 +2386,7 @@ posix_listdir(PyObject *self, PyObject *args)
 #ifndef MAX_PATH
 #define MAX_PATH    CCHMAXPATH
 #endif
+    PyObject *oname;
     char *name, *pt;
     Py_ssize_t len;
     PyObject *d, *v;
@@ -2333,11 +2396,13 @@ posix_listdir(PyObject *self, PyObject *args)
     FILEFINDBUF3   ep;
     APIRET rc;
 
-    if (!PyArg_ParseTuple(args, "et#:listdir", 
-                          Py_FileSystemDefaultEncoding, &name, &len))
+    if (!PyArg_ParseTuple(args, "O&:listdir", 
+                          PyUnicode_FSConverter, &oname))
         return NULL;
+    name = bytes2str(oname);
+    len = PyObject_Size(oname);
     if (len >= MAX_PATH) {
-        PyMem_Free(name);
+        release_bytes(oname);
         PyErr_SetString(PyExc_ValueError, "path too long");
         return NULL;
     }
@@ -2350,7 +2415,7 @@ posix_listdir(PyObject *self, PyObject *args)
     strcpy(namebuf + len, "*.*");
 
     if ((d = PyList_New(0)) == NULL) {
-        PyMem_Free(name);
+        release_bytes(oname);
         return NULL;
     }
 
@@ -2363,7 +2428,7 @@ posix_listdir(PyObject *self, PyObject *args)
 
     if (rc != NO_ERROR) {
         errno = ENOENT;
-        return posix_error_with_allocated_filename(name);
+        return posix_error_with_allocated_filename(oname);
     }
 
     if (srchcnt > 0) { /* If Directory is NOT Totally Empty, */
@@ -2393,11 +2458,11 @@ posix_listdir(PyObject *self, PyObject *args)
         } while (DosFindNext(hdir, &ep, sizeof(ep), &srchcnt) == NO_ERROR && srchcnt > 0);
     }
 
-    PyMem_Free(name);
+    release_bytes(oname);
     return d;
 #else
-
-       char *name = NULL;
+       PyObject *oname;
+       char *name;
        PyObject *d, *v;
        DIR *dirp;
        struct dirent *ep;
@@ -2408,14 +2473,15 @@ posix_listdir(PyObject *self, PyObject *args)
                arg_is_unicode = 0;
                PyErr_Clear();
        }
-       if (!PyArg_ParseTuple(args, "et:listdir", Py_FileSystemDefaultEncoding, &name))
+       if (!PyArg_ParseTuple(args, "O&:listdir", PyUnicode_FSConverter, &oname))
                return NULL;
+       name = bytes2str(oname, 1);
        if ((dirp = opendir(name)) == NULL) {
-               return posix_error_with_allocated_filename(name);
+               return posix_error_with_allocated_filename(oname);
        }
        if ((d = PyList_New(0)) == NULL) {
                closedir(dirp);
-               PyMem_Free(name);
+               release_bytes(oname);
                return NULL;
        }
        for (;;) {
@@ -2429,7 +2495,7 @@ posix_listdir(PyObject *self, PyObject *args)
                        } else {
                                closedir(dirp);
                                Py_DECREF(d);
-                               return posix_error_with_allocated_filename(name);
+                               return posix_error_with_allocated_filename(oname);
                        }
                }
                if (ep->d_name[0] == '.' &&
@@ -2447,18 +2513,16 @@ posix_listdir(PyObject *self, PyObject *args)
 
                        w = PyUnicode_FromEncodedObject(v,
                                        Py_FileSystemDefaultEncoding,
-                                       "strict");
-                       if (w != NULL) {
-                               Py_DECREF(v);
+                                       "utf8b");
+                       Py_DECREF(v);
+                       if (w != NULL)
                                v = w;
-                       }
                        else {
-                               /* Ignore undecodable filenames, as discussed
-                                * in issue 3187. To include these,
-                                * use getcwdb(). */
-                               PyErr_Clear();
-                               Py_DECREF(v);
-                               continue;
+                               /* Encoding failed to decode ASCII bytes.
+                                  Raise exception. */
+                               Py_DECREF(d);
+                               d = NULL;
+                               break;
                        }
                }
                if (PyList_Append(d, v) != 0) {
@@ -2470,7 +2534,7 @@ posix_listdir(PyObject *self, PyObject *args)
                Py_DECREF(v);
        }
        closedir(dirp);
-       PyMem_Free(name);
+       release_bytes(oname);
 
        return d;
 
@@ -2482,10 +2546,8 @@ posix_listdir(PyObject *self, PyObject *args)
 static PyObject *
 posix__getfullpathname(PyObject *self, PyObject *args)
 {
-       /* assume encoded strings won't more than double no of chars */
-       char inbuf[MAX_PATH*2];
-       char *inbufp = inbuf;
-       Py_ssize_t insize = sizeof(inbuf);
+       PyObject *opath;
+       char *path;
        char outbuf[MAX_PATH*2];
        char *temp;
 #ifdef Py_WIN_WIDE_FILENAMES
@@ -2519,13 +2581,17 @@ posix__getfullpathname(PyObject *self, PyObject *args)
                PyErr_Clear();
        }
 #endif
-       if (!PyArg_ParseTuple (args, "et#:_getfullpathname",
-                              Py_FileSystemDefaultEncoding, &inbufp,
-                              &insize))
+       if (!PyArg_ParseTuple (args, "O&:_getfullpathname",
+                              PyUnicode_FSConverter, &opath))
                return NULL;
-       if (!GetFullPathName(inbuf, sizeof(outbuf)/sizeof(outbuf[0]),
-                            outbuf, &temp))
-               return win32_error("GetFullPathName", inbuf);
+       path = bytes2str(opath, 1);
+       if (!GetFullPathName(path, sizeof(outbuf)/sizeof(outbuf[0]),
+                            outbuf, &temp)) {
+               win32_error("GetFullPathName", path);
+               release_bytes(opath);
+               return NULL;
+       }
+       release_bytes(opath);
        if (PyUnicode_Check(PyTuple_GetItem(args, 0))) {
                return PyUnicode_Decode(outbuf, strlen(outbuf),
                        Py_FileSystemDefaultEncoding, NULL);
@@ -2542,7 +2608,8 @@ static PyObject *
 posix_mkdir(PyObject *self, PyObject *args)
 {
        int res;
-       char *path = NULL;
+       PyObject *opath;
+       char *path;
        int mode = 0777;
 
 #ifdef Py_WIN_WIDE_FILENAMES
@@ -2563,9 +2630,10 @@ posix_mkdir(PyObject *self, PyObject *args)
                   are also valid. */
                PyErr_Clear();
        }
-       if (!PyArg_ParseTuple(args, "et|i:mkdir",
-                             Py_FileSystemDefaultEncoding, &path, &mode))
+       if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+                             PyUnicode_FSConverter, &opath, &mode))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
        /* PyUnicode_AS_UNICODE OK without thread lock as
           it is a simple dereference. */
@@ -2573,17 +2641,18 @@ posix_mkdir(PyObject *self, PyObject *args)
        Py_END_ALLOW_THREADS
        if (!res) {
                win32_error("mkdir", path);
-               PyMem_Free(path);
+               release_bytes(opath);
                return NULL;
        }
-       PyMem_Free(path);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 #else
 
-       if (!PyArg_ParseTuple(args, "et|i:mkdir",
-                             Py_FileSystemDefaultEncoding, &path, &mode))
+       if (!PyArg_ParseTuple(args, "O&|i:mkdir",
+                             PyUnicode_FSConverter, &opath, &mode))
                return NULL;
+       path = bytes2str(opath, 1);
        Py_BEGIN_ALLOW_THREADS
 #if ( defined(__WATCOMC__) || defined(PYCC_VACPP) ) && !defined(__QNX__)
        res = mkdir(path);
@@ -2592,8 +2661,8 @@ posix_mkdir(PyObject *self, PyObject *args)
 #endif
        Py_END_ALLOW_THREADS
        if (res < 0)
-               return posix_error_with_allocated_filename(path);
-       PyMem_Free(path);
+               return posix_error_with_allocated_filename(opath);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 #endif
@@ -2685,7 +2754,7 @@ error:
        Py_INCREF(Py_None);
        return Py_None;
 #else
-       return posix_2str(args, "etet:rename", rename);
+       return posix_2str(args, "O&O&:rename", rename);
 #endif
 }
 
@@ -2700,7 +2769,7 @@ posix_rmdir(PyObject *self, PyObject *args)
 #ifdef MS_WINDOWS
        return win32_1str(args, "rmdir", "y:rmdir", RemoveDirectoryA, "U:rmdir", RemoveDirectoryW);
 #else
-       return posix_1str(args, "et:rmdir", rmdir);
+       return posix_1str(args, "O&:rmdir", rmdir);
 #endif
 }
 
@@ -2713,9 +2782,9 @@ static PyObject *
 posix_stat(PyObject *self, PyObject *args)
 {
 #ifdef MS_WINDOWS
-       return posix_do_stat(self, args, "et:stat", STAT, "U:stat", win32_wstat);
+       return posix_do_stat(self, args, "O&:stat", STAT, "U:stat", win32_wstat);
 #else
-       return posix_do_stat(self, args, "et:stat", STAT, NULL, NULL);
+       return posix_do_stat(self, args, "O&:stat", STAT, NULL, NULL);
 #endif
 }
 
@@ -2781,7 +2850,7 @@ posix_unlink(PyObject *self, PyObject *args)
 #ifdef MS_WINDOWS
        return win32_1str(args, "remove", "y:remove", DeleteFileA, "U:remove", DeleteFileW);
 #else
-       return posix_1str(args, "et:remove", unlink);
+       return posix_1str(args, "O&:remove", unlink);
 #endif
 }
 
@@ -2853,7 +2922,8 @@ posix_utime(PyObject *self, PyObject *args)
        PyObject *arg;
        PyUnicodeObject *obwpath;
        wchar_t *wpath = NULL;
-       char *apath = NULL;
+       PyObject *oapath;
+       char *apath;
        HANDLE hFile;
        long atimesec, mtimesec, ausec, musec;
        FILETIME atime, mtime;
@@ -2875,9 +2945,10 @@ posix_utime(PyObject *self, PyObject *args)
                        PyErr_Clear();
        }
        if (!wpath) {
-               if (!PyArg_ParseTuple(args, "etO:utime",
-                               Py_FileSystemDefaultEncoding, &apath, &arg))
+               if (!PyArg_ParseTuple(args, "O&O:utime",
+                               PyUnicode_FSConverter, &oapath, &arg))
                        return NULL;
+               apath = bytes2str(oapath, 1);
                Py_BEGIN_ALLOW_THREADS
                hFile = CreateFileA(apath, FILE_WRITE_ATTRIBUTES, 0,
                                    NULL, OPEN_EXISTING,
@@ -2885,10 +2956,10 @@ posix_utime(PyObject *self, PyObject *args)
                Py_END_ALLOW_THREADS
                if (hFile == INVALID_HANDLE_VALUE) {
                        win32_error("utime", apath);
-                       PyMem_Free(apath);
+                       release_bytes(oapath);
                        return NULL;
                }
-               PyMem_Free(apath);
+               release_bytes(oapath);
        }
        
        if (arg == Py_None) {
@@ -2929,7 +3000,8 @@ done:
        return result;
 #else /* Py_WIN_WIDE_FILENAMES */
 
-       char *path = NULL;
+       PyObject *opath;
+       char *path;
        long atime, mtime, ausec, musec;
        int res;
        PyObject* arg;
@@ -2952,9 +3024,10 @@ done:
 #endif /* HAVE_UTIMES */
 
 
-       if (!PyArg_ParseTuple(args, "etO:utime",
-                                 Py_FileSystemDefaultEncoding, &path, &arg))
+       if (!PyArg_ParseTuple(args, "O&O:utime",
+                                 PyUnicode_FSConverter, &opath, &arg))
                return NULL;
+       path = bytes2str(opath, 1);
        if (arg == Py_None) {
                /* optional time values not given */
                Py_BEGIN_ALLOW_THREADS
@@ -2964,18 +3037,18 @@ done:
        else if (!PyTuple_Check(arg) || PyTuple_Size(arg) != 2) {
                PyErr_SetString(PyExc_TypeError,
                                "utime() arg 2 must be a tuple (atime, mtime)");
-               PyMem_Free(path);
+               release_bytes(opath);
                return NULL;
        }
        else {
                if (extract_time(PyTuple_GET_ITEM(arg, 0),
                                 &atime, &ausec) == -1) {
-                       PyMem_Free(path);
+                       release_bytes(opath);
                        return NULL;
                }
                if (extract_time(PyTuple_GET_ITEM(arg, 1),
                                 &mtime, &musec) == -1) {
-                       PyMem_Free(path);
+                       release_bytes(opath);
                        return NULL;
                }
                ATIME = atime;
@@ -2993,9 +3066,9 @@ done:
 #endif /* HAVE_UTIMES */
        }
        if (res < 0) {
-               return posix_error_with_allocated_filename(path);
+               return posix_error_with_allocated_filename(opath);
        }
-       PyMem_Free(path);
+       release_bytes(opath);
        Py_INCREF(Py_None);
        return Py_None;
 #undef UTIME_ARG
@@ -3030,6 +3103,22 @@ free_string_array(char **array, Py_ssize_t count)
                PyMem_Free(array[i]);
        PyMem_DEL(array);
 }
+
+int fsconvert_strdup(PyObject *o, char**out)
+{
+       PyObject *bytes;
+       Py_ssize_t size;
+       if (!PyUnicode_FSConverter(o, &bytes))
+               return 0;
+       size = PyObject_Size(bytes);
+       *out = PyMem_Malloc(size+1);
+       if (!*out)
+               return 0;
+       /* Don't lock bytes, as we hold the GIL */
+       memcpy(*out, bytes2str(bytes, 0), size+1);
+       Py_DECREF(bytes);
+       return 1;
+}
 #endif
 
 
@@ -3044,6 +3133,7 @@ Execute an executable path with arguments, replacing current process.\n\
 static PyObject *
 posix_execv(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        PyObject *argv;
        char **argvlist;
@@ -3053,10 +3143,11 @@ posix_execv(PyObject *self, PyObject *args)
        /* execv has two arguments: (path, argv), where
           argv is a list or tuple of strings. */
 
-       if (!PyArg_ParseTuple(args, "etO:execv",
-                              Py_FileSystemDefaultEncoding,
-                              &path, &argv))
+       if (!PyArg_ParseTuple(args, "O&O:execv",
+                              PyUnicode_FSConverter,
+                              &opath, &argv))
                return NULL;
+       path = bytes2str(opath, 1);
        if (PyList_Check(argv)) {
                argc = PyList_Size(argv);
                getitem = PyList_GetItem;
@@ -3067,28 +3158,27 @@ posix_execv(PyObject *self, PyObject *args)
        }
        else {
                PyErr_SetString(PyExc_TypeError, "execv() arg 2 must be a tuple or list");
-                PyMem_Free(path);
+                release_bytes(opath);
                return NULL;
        }
        if (argc < 1) {
                PyErr_SetString(PyExc_ValueError, "execv() arg 2 must not be empty");
-                PyMem_Free(path);
+                release_bytes(opath);
                return NULL;
        }
 
        argvlist = PyMem_NEW(char *, argc+1);
        if (argvlist == NULL) {
-               PyMem_Free(path);
+               release_bytes(opath);
                return PyErr_NoMemory();
        }
        for (i = 0; i < argc; i++) {
-               if (!PyArg_Parse((*getitem)(argv, i), "et",
-                                Py_FileSystemDefaultEncoding,
-                                &argvlist[i])) {
+               if (!fsconvert_strdup((*getitem)(argv, i),
+                                     &argvlist[i])) {
                        free_string_array(argvlist, i);
                        PyErr_SetString(PyExc_TypeError,
                                        "execv() arg 2 must contain only strings");
-                       PyMem_Free(path);
+                       release_bytes(opath);
                        return NULL;
 
                }
@@ -3100,7 +3190,7 @@ posix_execv(PyObject *self, PyObject *args)
        /* If we get here it's definitely an error */
 
        free_string_array(argvlist, argc);
-       PyMem_Free(path);
+       release_bytes(opath);
        return posix_error();
 }
 
@@ -3116,6 +3206,7 @@ Execute a path with arguments and environment, replacing current process.\n\
 static PyObject *
 posix_execve(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        PyObject *argv, *env;
        char **argvlist;
@@ -3129,10 +3220,11 @@ posix_execve(PyObject *self, PyObject *args)
           argv is a list or tuple of strings and env is a dictionary
           like posix.environ. */
 
-       if (!PyArg_ParseTuple(args, "etOO:execve",
-                             Py_FileSystemDefaultEncoding,
-                             &path, &argv, &env))
+       if (!PyArg_ParseTuple(args, "O&OO:execve",
+                             PyUnicode_FSConverter,
+                             &opath, &argv, &env))
                return NULL;
+       path = bytes2str(opath, 1);
        if (PyList_Check(argv)) {
                argc = PyList_Size(argv);
                getitem = PyList_GetItem;
@@ -3158,10 +3250,8 @@ posix_execve(PyObject *self, PyObject *args)
                goto fail_0;
        }
        for (i = 0; i < argc; i++) {
-               if (!PyArg_Parse((*getitem)(argv, i),
-                                "et;execve() arg 2 must contain only strings",
-                                Py_FileSystemDefaultEncoding,
-                                &argvlist[i]))
+               if (!fsconvert_strdup((*getitem)(argv, i),
+                                     &argvlist[i]))
                {
                        lastarg = i;
                        goto fail_1;
@@ -3243,7 +3333,7 @@ posix_execve(PyObject *self, PyObject *args)
        Py_XDECREF(vals);
        Py_XDECREF(keys);
   fail_0:
-       PyMem_Free(path);
+       release_bytes(opath);
        return NULL;
 }
 #endif /* HAVE_EXECV */
@@ -3261,6 +3351,7 @@ Execute the program 'path' in a new process.\n\
 static PyObject *
 posix_spawnv(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        PyObject *argv;
        char **argvlist;
@@ -3272,10 +3363,11 @@ posix_spawnv(PyObject *self, PyObject *args)
        /* spawnv has three arguments: (mode, path, argv), where
           argv is a list or tuple of strings. */
 
-       if (!PyArg_ParseTuple(args, "ietO:spawnv", &mode,
-                             Py_FileSystemDefaultEncoding,
-                             &path, &argv))
+       if (!PyArg_ParseTuple(args, "iO&O:spawnv", &mode,
+                             PyUnicode_FSConverter,
+                             &opath, &argv))
                return NULL;
+       path = bytes2str(opath, 1);
        if (PyList_Check(argv)) {
                argc = PyList_Size(argv);
                getitem = PyList_GetItem;
@@ -3287,24 +3379,23 @@ posix_spawnv(PyObject *self, PyObject *args)
        else {
                PyErr_SetString(PyExc_TypeError,
                                "spawnv() arg 2 must be a tuple or list");
-               PyMem_Free(path);
+               release_bytes(opath);
                return NULL;
        }
 
        argvlist = PyMem_NEW(char *, argc+1);
        if (argvlist == NULL) {
-               PyMem_Free(path);
+               release_bytes(opath);
                return PyErr_NoMemory();
        }
        for (i = 0; i < argc; i++) {
-               if (!PyArg_Parse((*getitem)(argv, i), "et",
-                                Py_FileSystemDefaultEncoding,
-                                &argvlist[i])) {
+               if (!fsconvert_strdup((*getitem)(argv, i),
+                                     &argvlist[i])) {
                        free_string_array(argvlist, i);
                        PyErr_SetString(
                                PyExc_TypeError,
                                "spawnv() arg 2 must contain only strings");
-                       PyMem_Free(path);
+                       release_bytes(opath);
                        return NULL;
                }
        }
@@ -3324,7 +3415,7 @@ posix_spawnv(PyObject *self, PyObject *args)
 #endif
 
        free_string_array(argvlist, argc);
-       PyMem_Free(path);
+       release_bytes(opath);
 
        if (spawnval == -1)
                return posix_error();
@@ -3349,6 +3440,7 @@ Execute the program 'path' in a new process.\n\
 static PyObject *
 posix_spawnve(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        PyObject *argv, *env;
        char **argvlist;
@@ -3364,10 +3456,11 @@ posix_spawnve(PyObject *self, PyObject *args)
           argv is a list or tuple of strings and env is a dictionary
           like posix.environ. */
 
-       if (!PyArg_ParseTuple(args, "ietOO:spawnve", &mode,
-                             Py_FileSystemDefaultEncoding,
-                             &path, &argv, &env))
+       if (!PyArg_ParseTuple(args, "iO&OO:spawnve", &mode,
+                             PyUnicode_FSConverter,
+                             &opath, &argv, &env))
                return NULL;
+       path = bytes2str(opath, 1);
        if (PyList_Check(argv)) {
                argc = PyList_Size(argv);
                getitem = PyList_GetItem;
@@ -3393,10 +3486,8 @@ posix_spawnve(PyObject *self, PyObject *args)
                goto fail_0;
        }
        for (i = 0; i < argc; i++) {
-               if (!PyArg_Parse((*getitem)(argv, i),
-                            "et;spawnve() arg 2 must contain only strings",
-                                Py_FileSystemDefaultEncoding,
-                                &argvlist[i]))
+               if (!fsconvert_strdup((*getitem)(argv, i),
+                                     &argvlist[i]))
                {
                        lastarg = i;
                        goto fail_1;
@@ -3486,7 +3577,7 @@ posix_spawnve(PyObject *self, PyObject *args)
        Py_XDECREF(vals);
        Py_XDECREF(keys);
   fail_0:
-       PyMem_Free(path);
+       release_bytes(opath);
        return res;
 }
 
@@ -3504,6 +3595,7 @@ search path to find the file.\n\
 static PyObject *
 posix_spawnvp(PyObject *self, PyObject *args)
 {
+       PyObject *opath;
        char *path;
        PyObject *argv;
        char **argvlist;
@@ -3514,10 +3606,11 @@ posix_spawnvp(PyObject *self, PyObject *args)
        /* spawnvp has three arguments: (mode, path, argv), where
           argv is a list or tuple of strings. */
 
-       if (!PyArg_ParseTuple(args, "ietO:spawnvp", &mode,
-                             Py_FileSystemDefaultEncoding,
-                             &path, &argv))
+       if (!PyArg_ParseTuple(args, "iO&O:spawnvp", &mode,
+                             PyUnicode_FSConverter,
+                             &opath, &argv))
                return NULL;
+       path = bytes2str(opath);
        if (PyList_Check(argv)) {
                argc = PyList_Size(argv);
                getitem = PyList_GetItem;
@@ -3529,24 +3622,23 @@ posix_spawnvp(PyObject *self, PyObject *args)
        else {
                PyErr_SetString(PyExc_TypeError,
                                "spawnvp() arg 2 must be a tuple or list");
-               PyMem_Free(path);
+               release_bytes(opath);
                return NULL;
        }
 
        argvlist = PyMem_NEW(char *, argc+1);
        if (argvlist == NULL) {
-               PyMem_Free(path);
+               release_bytes(opath);
                return PyErr_NoMemory();
        }
        for (i = 0; i < argc; i++) {
-               if (!PyArg_Parse((*getitem)(argv, i), "et",
-                                Py_FileSystemDefaultEncoding,
-                                &argvlist[i])) {
+               if (!fsconvert_strdup((*getitem)(argv, i),
+                                     &argvlist[i])) {
                        free_string_array(argvlist, i);
                        PyErr_SetString(
                                PyExc_TypeError,
                                "spawnvp() arg 2 must contain only strings");
-                       PyMem_Free(path);
+                       release_bytes(opath);
                        return NULL;
                }
        }
@@ -3561,7 +3653,7 @@ posix_spawnvp(PyObject *self, PyObject *args)
        Py_END_ALLOW_THREADS
 
        free_string_array(argvlist, argc);
-       PyMem_Free(path);
+       release_bytes(opath);
 
        if (spawnval == -1)
                return posix_error();
@@ -3583,6 +3675,7 @@ search path to find the file.\n\
 static PyObject *
 posix_spawnvpe(PyObject *self, PyObject *args)
 {
+       PyObject *opath
        char *path;
        PyObject *argv, *env;
        char **argvlist;
@@ -3598,9 +3691,10 @@ posix_spawnvpe(PyObject *self, PyObject *args)
           like posix.environ. */
 
        if (!PyArg_ParseTuple(args, "ietOO:spawnvpe", &mode,
-                             Py_FileSystemDefaultEncoding,
-                             &path, &argv, &env))
+                             PyUnicode_FSConverter,
+                             &opath, &argv, &env))
                return NULL;
+       path = bytes2str(opath);
        if (PyList_Check(argv)) {
                argc = PyList_Size(argv);
                getitem = PyList_GetItem;
@@ -3626,10 +3720,8 @@ posix_spawnvpe(PyObject *self, PyObject *args)
                goto fail_0;
        }
        for (i = 0; i < argc; i++) {
-               if (!PyArg_Parse((*getitem)(argv, i),
-                            "et;spawnvpe() arg 2 must contain only strings",
-                                Py_FileSystemDefaultEncoding,
-                                &argvlist[i]))
+               if (!fsconvert_strdup((*getitem)(argv, i),
+                                     &argvlist[i]))
                {
                        lastarg = i;
                        goto fail_1;
@@ -3710,7 +3802,7 @@ posix_spawnvpe(PyObject *self, PyObject *args)
        Py_XDECREF(vals);
        Py_XDECREF(keys);
   fail_0:
-       PyMem_Free(path);
+       release_bytes(opath);
        return res;
 }
 #endif /* PYOS_OS2 */
@@ -4549,12 +4641,12 @@ static PyObject *
 posix_lstat(PyObject *self, PyObject *args)
 {
 #ifdef HAVE_LSTAT
-       return posix_do_stat(self, args, "et:lstat", lstat, NULL, NULL);
+       return posix_do_stat(self, args, "O&:lstat", lstat, NULL, NULL);
 #else /* !HAVE_LSTAT */
 #ifdef MS_WINDOWS
-       return posix_do_stat(self, args, "et:lstat", STAT, "U:lstat", win32_wstat);
+       return posix_do_stat(self, args, "O&:lstat", STAT, "U:lstat", win32_wstat);
 #else
-       return posix_do_stat(self, args, "et:lstat", STAT, NULL, NULL);
+       return posix_do_stat(self, args, "O&:lstat", STAT, NULL, NULL);
 #endif
 #endif /* !HAVE_LSTAT */
 }
@@ -4570,16 +4662,18 @@ posix_readlink(PyObject *self, PyObject *args)
 {
        PyObject* v;
        char buf[MAXPATHLEN];
+       PyObject *opath;
        char *path;
        int n;
        int arg_is_unicode = 0;
 
-       if (!PyArg_ParseTuple(args, "et:readlink", 
-                               Py_FileSystemDefaultEncoding, &path))
+       if (!PyArg_ParseTuple(args, "O&:readlink", 
+                               PyUnicode_FSConverter, &opath))
                return NULL;
+       path = bytes2str(opath, 1);
        v = PySequence_GetItem(args, 0);
        if (v == NULL) {
-               PyMem_Free(path);
+               release_bytes(opath);
                return NULL;
        }
 
@@ -4592,16 +4686,16 @@ posix_readlink(PyObject *self, PyObject *args)
        n = readlink(path, buf, (int) sizeof buf);
        Py_END_ALLOW_THREADS
        if (n < 0)
-               return posix_error_with_allocated_filename(path);
+               return posix_error_with_allocated_filename(opath);
 
-       PyMem_Free(path);
+       release_bytes(opath);
        v = PyBytes_FromStringAndSize(buf, n);
        if (arg_is_unicode) {
                PyObject *w;
 
                w = PyUnicode_FromEncodedObject(v,
                                Py_FileSystemDefaultEncoding,
-                               "strict");
+                               "utf8b");
                if (w != NULL) {
                        Py_DECREF(v);
                        v = w;
@@ -4623,7 +4717,7 @@ Create a symbolic link pointing to src named dst.");
 static PyObject *
 posix_symlink(PyObject *self, PyObject *args)
 {
-       return posix_2str(args, "etet:symlink", symlink);
+       return posix_2str(args, "O&O&:symlink", symlink);
 }
 #endif /* HAVE_SYMLINK */
 
@@ -4811,7 +4905,8 @@ Open a file (for low level IO).");
 static PyObject *
 posix_open(PyObject *self, PyObject *args)
 {
-       char *file = NULL;
+       PyObject *ofile;
+       char *file;
        int flag;
        int mode = 0777;
        int fd;
@@ -4835,17 +4930,17 @@ posix_open(PyObject *self, PyObject *args)
        }
 #endif
 
-       if (!PyArg_ParseTuple(args, "eti|i",
-                             Py_FileSystemDefaultEncoding, &file,
+       if (!PyArg_ParseTuple(args, "O&i|i",
+                             PyUnicode_FSConverter, &ofile,
                              &flag, &mode))
                return NULL;
-
+       file = bytes2str(ofile, 1);
        Py_BEGIN_ALLOW_THREADS
        fd = open(file, flag, mode);
        Py_END_ALLOW_THREADS
        if (fd < 0)
-               return posix_error_with_allocated_filename(file);
-       PyMem_Free(file);
+               return posix_error_with_allocated_filename(ofile);
+       release_bytes(ofile);
        return PyLong_FromLong((long)fd);
 }
 
@@ -5289,20 +5384,27 @@ posix_putenv(PyObject *self, PyObject *args)
         wchar_t *s1, *s2;
         wchar_t *newenv;
 #else
+       PyObject *os1, *os2;
         char *s1, *s2;
         char *newenv;
 #endif
        PyObject *newstr;
        size_t len;
 
-       if (!PyArg_ParseTuple(args,
 #ifdef MS_WINDOWS
+       if (!PyArg_ParseTuple(args,
                              "uu:putenv",
-#else
-                             "ss:putenv",
-#endif
                              &s1, &s2))
                return NULL;
+#else
+       if (!PyArg_ParseTuple(args,
+                             "O&O&:putenv",
+                             PyUnicode_FSConverter, &os1, 
+                             PyUnicode_FSConverter, &os2))
+               return NULL;
+       s1 = bytes2str(os1, 1);
+       s2 = bytes2str(os2, 1);
+#endif
 
 #if defined(PYOS_OS2)
     if (stricmp(s1, "BEGINLIBPATH") == 0) {
@@ -5345,6 +5447,8 @@ posix_putenv(PyObject *self, PyObject *args)
        PyOS_snprintf(newenv, len, "%s=%s", s1, s2);
        if (putenv(newenv)) {
                 Py_DECREF(newstr);
+               release_bytes(os1);
+               release_bytes(os2);
                 posix_error();
                 return NULL;
        }
@@ -5364,6 +5468,10 @@ posix_putenv(PyObject *self, PyObject *args)
 
 #if defined(PYOS_OS2)
     }
+#endif
+#ifndef MS_WINDOWS
+       release_bytes(os1);
+       release_bytes(os2);
 #endif
        Py_INCREF(Py_None);
         return Py_None;
@@ -6688,6 +6796,7 @@ the underlying Win32 ShellExecute function doesn't work if it is.");
 static PyObject *
 win32_startfile(PyObject *self, PyObject *args)
 {
+       PyObject *ofilepath;
        char *filepath;
        char *operation = NULL;
        HINSTANCE rc;
@@ -6729,20 +6838,21 @@ win32_startfile(PyObject *self, PyObject *args)
 #endif
 
 normal:
-       if (!PyArg_ParseTuple(args, "et|s:startfile", 
-                             Py_FileSystemDefaultEncoding, &filepath, 
+       if (!PyArg_ParseTuple(args, "O&|s:startfile", 
+                             PyUnicode_FSConverter, &ofilepath, 
                              &operation))
                return NULL;
+       filepath = bytes2str(ofilepath, 1);
        Py_BEGIN_ALLOW_THREADS
        rc = ShellExecute((HWND)0, operation, filepath, 
                          NULL, NULL, SW_SHOWNORMAL);
        Py_END_ALLOW_THREADS
        if (rc <= (HINSTANCE)32) {
                PyObject *errval = win32_error("startfile", filepath);
-               PyMem_Free(filepath);
+               release_bytes(ofilepath);
                return errval;
        }
-       PyMem_Free(filepath);
+       release_bytes(ofilepath);
        Py_INCREF(Py_None);
        return Py_None;
 }
index f6da86f1d682c32b6b24279f4e7ab3b87ef4b6c4..4c0a55bb1faabe77c717c6a89eba4f70d71547b1 100644 (file)
@@ -14,6 +14,93 @@ wmain(int argc, wchar_t **argv)
        return Py_Main(argc, argv);
 }
 #else
+static wchar_t*
+char2wchar(char* arg)
+{
+       wchar_t *res;
+#ifdef HAVE_BROKEN_MBSTOWCS
+       /* Some platforms have a broken implementation of
+        * mbstowcs which does not count the characters that
+        * would result from conversion.  Use an upper bound.
+        */
+       size_t argsize = strlen(arg);
+#else
+       size_t argsize = mbstowcs(NULL, arg, 0);
+#endif
+       size_t count;
+       unsigned char *in;
+       wchar_t *out;
+#ifdef HAVE_MBRTOWC
+       mbstate_t mbs;
+#endif
+       if (argsize != (size_t)-1) {
+               res = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
+               if (!res)
+                       goto oom;
+               count = mbstowcs(res, arg, argsize+1);
+               if (count != (size_t)-1)
+                       return res;
+               PyMem_Free(res);
+       }
+       /* Conversion failed. Fall back to escaping with utf8b. */
+#ifdef HAVE_MBRTOWC
+       /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
+       
+       /* Overallocate; as multi-byte characters are in the argument, the
+          actual output could use less memory. */
+       argsize = strlen(arg) + 1;
+       res = PyMem_Malloc(argsize*sizeof(wchar_t));
+       if (!res) goto oom;
+       in = (unsigned char*)arg;
+       out = res;
+       memset(&mbs, 0, sizeof mbs);
+       while (argsize) {
+               size_t converted = mbrtowc(out, (char*)in, argsize, &mbs);
+               if (converted == 0)
+                       /* Reached end of string; null char stored. */
+                       break;
+               if (converted == (size_t)-2) {
+                       /* Incomplete character. This should never happen,
+                          since we provide everything that we have -
+                          unless there is a bug in the C library, or I 
+                          misunderstood how mbrtowc works. */
+                       fprintf(stderr, "unexpected mbrtowc result -2\n");
+                       return NULL;
+               }
+               if (converted == (size_t)-1) {
+                       /* Conversion error. Escape as UTF-8b, and start over
+                          in the initial shift state. */
+                       *out++ = 0xdc00 + *in++;
+                       argsize--;
+                       memset(&mbs, 0, sizeof mbs);
+                       continue;
+               }
+               /* successfully converted some bytes */
+               in += converted;
+               argsize -= converted;
+               out++;
+       }
+#else
+       /* Cannot use C locale for escaping; manually escape as if charset
+          is ASCII (i.e. escape all bytes > 128. This will still roundtrip
+          correctly in the locale's charset, which must be an ASCII superset. */
+       res = PyMem_Malloc((strlen(arg)+1)*sizeof(wchar_t));
+       if (!res) goto oom;
+       in = (unsigned char*)arg;
+       out = res;
+       while(*in)
+               if(*in < 128)
+                       *out++ = *in++;
+               else
+                       *out++ = 0xdc00 + *in++;
+       *out = 0;
+#endif
+       return res;
+oom:
+       fprintf(stderr, "out of memory\n");
+       return NULL;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -40,31 +127,9 @@ main(int argc, char **argv)
        oldloc = strdup(setlocale(LC_ALL, NULL));
        setlocale(LC_ALL, "");
        for (i = 0; i < argc; i++) {
-#ifdef HAVE_BROKEN_MBSTOWCS
-               /* Some platforms have a broken implementation of
-                * mbstowcs which does not count the characters that
-                * would result from conversion.  Use an upper bound.
-                */
-               size_t argsize = strlen(argv[i]);
-#else
-               size_t argsize = mbstowcs(NULL, argv[i], 0);
-#endif
-               size_t count;
-               if (argsize == (size_t)-1) {
-                       fprintf(stderr, "Could not convert argument %d to string\n", i);
+               argv_copy2[i] = argv_copy[i] = char2wchar(argv[i]);
+               if (!argv_copy[i])
                        return 1;
-               }
-               argv_copy[i] = (wchar_t *)PyMem_Malloc((argsize+1)*sizeof(wchar_t));
-               argv_copy2[i] = argv_copy[i];
-               if (!argv_copy[i]) {
-                       fprintf(stderr, "out of memory\n");
-                       return 1;
-               }
-               count = mbstowcs(argv_copy[i], argv[i], argsize+1);
-               if (count == (size_t)-1) {
-                       fprintf(stderr, "Could not convert argument %d to string\n", i);
-                       return 1;
-               }
        }
        setlocale(LC_ALL, oldloc);
        free(oldloc);
index 18b6fa26803daf0484e1dcb38739bb7ddedee0b5..218e70b0938dcc4b881be802a4af6ffdad779c5f 100644 (file)
@@ -1530,6 +1530,53 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
     }
 }
 
+/* Convert the argument to a bytes object, according to the file
+   system encoding */
+
+int
+PyUnicode_FSConverter(PyObject* arg, void* addr)
+{
+    PyObject *output = NULL;
+    Py_ssize_t size;
+    void *data;
+    if (PyBytes_Check(arg) || PyByteArray_Check(arg)) {
+        output = arg;
+        Py_INCREF(output);
+    }
+    else {
+        arg = PyUnicode_FromObject(arg);
+        if (!arg)
+            return 0;
+        output = PyUnicode_AsEncodedObject(arg, 
+                                           Py_FileSystemDefaultEncoding,
+                                           "utf8b");
+        Py_DECREF(arg);
+        if (!output)
+            return 0;
+        if (!PyBytes_Check(output)) {
+            Py_DECREF(output);
+            PyErr_SetString(PyExc_TypeError, "encoder failed to return bytes");
+            return 0;
+        }
+    }
+    if (PyBytes_Check(output)) {
+         size = PyBytes_GET_SIZE(output);
+         data = PyBytes_AS_STRING(output);
+    } 
+    else {
+         size = PyByteArray_GET_SIZE(output);
+         data = PyByteArray_AS_STRING(output);
+    }
+    if (size != strlen(data)) {
+        PyErr_SetString(PyExc_TypeError, "embedded NUL character");
+        Py_DECREF(output);
+        return 0;
+    }
+    *(PyObject**)addr = output;
+    return 1;
+}
+
+
 char*
 _PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
 {
@@ -4154,11 +4201,22 @@ static PyObject *unicode_encode_ucs1(const Py_UNICODE *p,
                                                               collstart-startp, collend-startp, &newpos);
                 if (repunicode == NULL)
                     goto onError;
-                if (!PyUnicode_Check(repunicode)) {
-                    /* Implementation limitation: byte results not supported yet. */
-                    PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+                if (PyBytes_Check(repunicode)) {
+                    /* Directly copy bytes result to output. */
+                    repsize = PyBytes_Size(repunicode);
+                    if (repsize > 1) {
+                        /* Make room for all additional bytes. */
+                        if (_PyBytes_Resize(&res, ressize+repsize-1)) {
+                            Py_DECREF(repunicode);
+                            goto onError;
+                        }
+                        ressize += repsize-1;
+                    }
+                    memcpy(str, PyBytes_AsString(repunicode), repsize);
+                    str += repsize;
+                    p = startp + newpos;
                     Py_DECREF(repunicode);
-                    goto onError;
+                    break;
                 }
                 /* need more space? (at least enough for what we
                    have+the replacement+the rest of the string, so
@@ -5123,11 +5181,24 @@ int charmap_encoding_error(
                                                       collstartpos, collendpos, &newpos);
         if (repunicode == NULL)
             return -1;
-        if (!PyUnicode_Check(repunicode)) {
-            /* Implementation limitation: byte results not supported yet. */
-            PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
+        if (PyBytes_Check(repunicode)) {
+            /* Directly copy bytes result to output. */
+            Py_ssize_t outsize = PyBytes_Size(*res);
+            Py_ssize_t requiredsize;
+            repsize = PyBytes_Size(repunicode);
+            requiredsize = *respos + repsize;
+            if (requiredsize > outsize)
+                /* Make room for all additional bytes. */
+                if (charmapencode_resize(res, respos, requiredsize)) {
+                    Py_DECREF(repunicode);
+                    return -1;
+                }
+            memcpy(PyBytes_AsString(*res) + *respos,
+                   PyBytes_AsString(repunicode),  repsize);
+            *respos += repsize;
+            *inpos = newpos;
             Py_DECREF(repunicode);
-            return -1;
+            break;
         }
         /* generate replacement  */
         repsize = PyUnicode_GET_SIZE(repunicode);
@@ -5691,7 +5762,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
             if (repunicode == NULL)
                 goto onError;
             if (!PyUnicode_Check(repunicode)) {
-                /* Implementation limitation: byte results not supported yet. */
+                /* Byte results not supported, since they have no decimal property. */
                 PyErr_SetString(PyExc_TypeError, "error handler should return unicode");
                 Py_DECREF(repunicode);
                 goto onError;
index 633a24c6611ea2ba225535c86a8817f34576221f..7e3ff8a07f0a40016495f1c78235a00f8d7af548 100644 (file)
@@ -829,6 +829,82 @@ PyCodec_SurrogateErrors(PyObject *exc)
     }
 }
 
+static PyObject *
+PyCodec_UTF8bErrors(PyObject *exc)
+{
+    PyObject *restuple;
+    PyObject *object;
+    Py_ssize_t start;
+    Py_ssize_t end;
+    PyObject *res;
+    if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
+       Py_UNICODE *p;
+       Py_UNICODE *startp;
+       char *outp;
+       if (PyUnicodeEncodeError_GetStart(exc, &start))
+           return NULL;
+       if (PyUnicodeEncodeError_GetEnd(exc, &end))
+           return NULL;
+       if (!(object = PyUnicodeEncodeError_GetObject(exc)))
+           return NULL;
+       startp = PyUnicode_AS_UNICODE(object);
+       res = PyBytes_FromStringAndSize(NULL, end-start);
+       if (!res) {
+           Py_DECREF(object);
+           return NULL;
+       }
+       outp = PyBytes_AsString(res);
+       for (p = startp+start; p < startp+end; p++) {
+           Py_UNICODE ch = *p;
+           if (ch < 0xdc80 || ch > 0xdcff) {
+               /* Not a UTF-8b surrogate, fail with original exception */
+               PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+               Py_DECREF(res);
+               Py_DECREF(object);
+               return NULL;
+           }
+           *outp++ = ch - 0xdc00;
+       }
+       restuple = Py_BuildValue("(On)", res, end);
+       Py_DECREF(res);
+       Py_DECREF(object);
+       return restuple;
+    }
+    else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
+       unsigned char *p;
+       Py_UNICODE ch[4]; /* decode up to 4 bad bytes. */
+       int consumed = 0;
+       if (PyUnicodeDecodeError_GetStart(exc, &start))
+           return NULL;
+       if (PyUnicodeDecodeError_GetEnd(exc, &end))
+           return NULL;
+       if (!(object = PyUnicodeDecodeError_GetObject(exc)))
+           return NULL;
+       if (!(p = (unsigned char*)PyBytes_AsString(object))) {
+           Py_DECREF(object);
+           return NULL;
+       }
+       while (consumed < 4 && consumed < end-start) {
+           /* Refuse to escape ASCII bytes. */
+           if (p[start+consumed] < 128)
+               break;
+           ch[consumed] = 0xdc00 + p[start+consumed];
+           consumed++;
+       }
+       Py_DECREF(object);
+       if (!consumed) {
+           /* codec complained about ASCII byte. */
+           PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
+           return NULL;
+       }           
+       return Py_BuildValue("(u#n)", ch, consumed, start+consumed);
+    }
+    else {
+       wrong_exception_type(exc);
+       return NULL;
+    }
+}
+
        
 static PyObject *strict_errors(PyObject *self, PyObject *exc)
 {
@@ -864,6 +940,11 @@ static PyObject *surrogates_errors(PyObject *self, PyObject *exc)
     return PyCodec_SurrogateErrors(exc);
 }
 
+static PyObject *utf8b_errors(PyObject *self, PyObject *exc)
+{
+    return PyCodec_UTF8bErrors(exc);
+}
+
 static int _PyCodecRegistry_Init(void)
 {
     static struct {
@@ -918,6 +999,14 @@ static int _PyCodecRegistry_Init(void)
                surrogates_errors,
                METH_O
            }
+       },
+       {
+           "utf8b",
+           {
+               "utf8b",
+               utf8b_errors,
+               METH_O
+           }
        }
     };
 
index f93403ba5a27b55e7d42f24a1ca751464bb27ff8..c75f55fe9a76953cf0ac1232af0fa2721fb18434 100644 (file)
@@ -262,6 +262,22 @@ Py_InitializeEx(int install_sigs)
 
        _PyImportHooks_Init();
 
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+       /* On Unix, set the file system encoding according to the
+          user's preference, if the CODESET names a well-known
+          Python codec, and Py_FileSystemDefaultEncoding isn't
+          initialized by other means. Also set the encoding of
+          stdin and stdout if these are terminals.  */
+
+       codeset = get_codeset();
+       if (codeset) {
+               if (!Py_FileSystemDefaultEncoding)
+                       Py_FileSystemDefaultEncoding = codeset;
+               else
+                       free(codeset);
+       }
+#endif
+
        if (install_sigs)
                initsigs(); /* Signal handling stuff, including initintr() */
                
@@ -285,22 +301,6 @@ Py_InitializeEx(int install_sigs)
 #ifdef WITH_THREAD
        _PyGILState_Init(interp, tstate);
 #endif /* WITH_THREAD */
-
-#if defined(HAVE_LANGINFO_H) && defined(CODESET)
-       /* On Unix, set the file system encoding according to the
-          user's preference, if the CODESET names a well-known
-          Python codec, and Py_FileSystemDefaultEncoding isn't
-          initialized by other means. Also set the encoding of
-          stdin and stdout if these are terminals.  */
-
-       codeset = get_codeset();
-       if (codeset) {
-               if (!Py_FileSystemDefaultEncoding)
-                       Py_FileSystemDefaultEncoding = codeset;
-               else
-                       free(codeset);
-       }
-#endif
 }
 
 void
index d1da2856fefbc2f23b09bbe9efee55c06d73d7de..cdc9515ea550c5240c8fbf587b8af9696fdd10c3 100755 (executable)
--- a/configure
+++ b/configure
@@ -1,5 +1,5 @@
 #! /bin/sh
-# From configure.in Revision: 71731 .
+# From configure.in Revision: 72144 .
 # Guess values for system-dependent variables and create Makefiles.
 # Generated by GNU Autoconf 2.61 for python 3.1.
 #
@@ -16297,13 +16297,14 @@ echo "${ECHO_T}MACHDEP_OBJS" >&6; }
 
 
 
+
 
 
 for ac_func in alarm setitimer getitimer bind_textdomain_codeset chown \
  clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
  gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
  getpriority getpwent getspnam getspent getsid getwd \
- kill killpg lchmod lchown lstat mkfifo mknod mktime \
+ kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
  mremap nice pathconf pause plock poll pthread_init \
  putenv readlink realpath \
  select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \
index 6a1e231131f3ec59d76b1d20ea0c6f4cf4cde512..ba43b2121ef5e38d1821be69bb096ebe0f81af7a 100644 (file)
@@ -2403,7 +2403,7 @@ AC_CHECK_FUNCS(alarm setitimer getitimer bind_textdomain_codeset chown \
  clock confstr ctermid execv fchmod fchown fork fpathconf ftime ftruncate \
  gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
  getpriority getpwent getspnam getspent getsid getwd \
- kill killpg lchmod lchown lstat mkfifo mknod mktime \
+ kill killpg lchmod lchown lstat mbrtowc mkfifo mknod mktime \
  mremap nice pathconf pause plock poll pthread_init \
  putenv readlink realpath \
  select sem_open sem_timedwait sem_getvalue sem_unlink setegid seteuid \
index 01bc23575f43cc4830a3300928c4fad0fe9bda27..4c779000c814bd9c11f3ae567bf93d22b733aec7 100644 (file)
 /* Define this if you have the makedev macro. */
 #undef HAVE_MAKEDEV
 
+/* Define to 1 if you have the `mbrtowc' function. */
+#undef HAVE_MBRTOWC
+
 /* Define to 1 if you have the `memmove' function. */
 #undef HAVE_MEMMOVE