]> granicus.if.org Git - python/commitdiff
Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
authorMartin v. Löwis <martin@v.loewis.de>
Fri, 29 May 2009 16:22:26 +0000 (16:22 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Fri, 29 May 2009 16:22:26 +0000 (16:22 +0000)
of the command line.

Misc/NEWS
Modules/python.c

index b98a36884846589bd79ca7945786645cffffc186..1e1277327a2fe7a138c27f2f356e787d6693fec8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 3.1 release candidate 1?
 Core and Builtins
 -----------------
 
+- Issue #6097: Escape UTF-8 surrogates resulting from mbstocs conversion
+  of the command line.
+
 - Issue #6012: Add cleanup support to O& argument parsing.
 
 - Issue #6089: Fixed str.format with certain invalid field specifiers
index 13c6d5b82a42dd28998566bf92d709ae59967c5e..edd33f433aa540dd1d15bd6ab58fc6953a994ffc 100644 (file)
@@ -38,8 +38,16 @@ char2wchar(char* arg)
                if (!res)
                        goto oom;
                count = mbstowcs(res, arg, argsize+1);
-               if (count != (size_t)-1)
-                       return res;
+               if (count != (size_t)-1) {
+                       wchar_t *tmp;
+                       /* Only use the result if it contains no
+                          surrogate characters. */
+                       for (tmp = res; *tmp != 0 &&
+                                    (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
+                               ;
+                       if (*tmp == 0)
+                               return res;
+               }
                PyMem_Free(res);
        }
        /* Conversion failed. Fall back to escaping with surrogateescape. */
@@ -75,6 +83,14 @@ char2wchar(char* arg)
                        memset(&mbs, 0, sizeof mbs);
                        continue;
                }
+               if (*out >= 0xd800 && *out <= 0xdfff) {
+                       /* Surrogate character.  Escape the original
+                          byte sequence with surrogateescape. */
+                       argsize -= converted;
+                       while (converted--)
+                               *out++ = 0xdc00 + *in++;
+                       continue;
+               }
                /* successfully converted some bytes */
                in += converted;
                argsize -= converted;