]> granicus.if.org Git - python/commitdiff
Merged revisions 80031 via svnmerge from
authorVictor Stinner <victor.stinner@haypocalc.com>
Tue, 13 Apr 2010 11:09:22 +0000 (11:09 +0000)
committerVictor Stinner <victor.stinner@haypocalc.com>
Tue, 13 Apr 2010 11:09:22 +0000 (11:09 +0000)
svn+ssh://pythondev@svn.python.org/python/branches/py3k

........
  r80031 | victor.stinner | 2010-04-13 13:07:24 +0200 (mar., 13 avril 2010) | 4 lines

  Issue #8383: pickle and pickletools use surrogatepass error handler when
  encoding unicode as utf8 to support lone surrogates and stay compatible with
  Python 2.x and 3.0
........

Lib/pickle.py
Lib/pickletools.py
Lib/test/pickletester.py
Misc/NEWS
Modules/_pickle.c

index 7af4ce969a48b69b4bd2dbfd7b1a2e48f45d34d2..7b48527d3a1f5104ae365e15c185e63ebd9eaa78 100644 (file)
@@ -499,7 +499,7 @@ class _Pickler:
 
     def save_str(self, obj, pack=struct.pack):
         if self.bin:
-            encoded = obj.encode('utf-8')
+            encoded = obj.encode('utf-8', 'surrogatepass')
             n = len(encoded)
             self.write(BINUNICODE + pack("<i", n) + encoded)
         else:
@@ -966,7 +966,7 @@ class _Unpickler:
 
     def load_binunicode(self):
         len = mloads(b'i' + self.read(4))
-        self.append(str(self.read(len), 'utf-8'))
+        self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
     dispatch[BINUNICODE[0]] = load_binunicode
 
     def load_short_binstring(self):
index ca11aa3871852b5f5512ae61f884e0677bbdf73c..6ab75c7ab77bcf28dc85fee242b9f7cc4a091e98 100644 (file)
@@ -469,7 +469,7 @@ def read_unicodestring4(f):
         raise ValueError("unicodestring4 byte count < 0: %d" % n)
     data = f.read(n)
     if len(data) == n:
-        return str(data, 'utf-8')
+        return str(data, 'utf-8', 'surrogatepass')
     raise ValueError("expected %d bytes in a unicodestring4, but only %d "
                      "remain" % (n, len(data)))
 
index 7ecc1053d73c0c2162d626dd0b684eb4b40c2aa4..30ff4ef29992ce005daf0cc0c9690431f6008219 100644 (file)
@@ -515,7 +515,9 @@ class AbstractPickleTests(unittest.TestCase):
 
     def test_unicode(self):
         endcases = ['', '<\\u>', '<\\\u1234>', '<\n>',
-                    '<\\>', '<\\\U00012345>']
+                    '<\\>', '<\\\U00012345>',
+                    # surrogates
+                    '<\udc80>']
         for proto in protocols:
             for u in endcases:
                 p = self.dumps(u, proto)
index b829138f39268e8b876d7cf08e177805f1cced61..d72ce12aaf97160b9a012d68dac90faa1252dcaa 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -28,6 +28,10 @@ Core and Builtins
 Library
 -------
 
+- Issue #8383: pickle and pickletools use surrogatepass error handler when
+  encoding unicode as utf8 to support lone surrogates and stay compatible with
+  Python 2.x and 3.0
+
 - Issue #8179: Fix macpath.realpath() on a non-existing path.
 
 - Issue #8139: ossaudiodev didn't initialize its types properly, therefore
index 29aed7adb3b8d8ea4fbefeb1900d094038d6d180..0e1c2cdc8828af1c791749ae95c305171d336787 100644 (file)
@@ -1227,7 +1227,9 @@ save_unicode(PicklerObject *self, PyObject *obj)
     if (self->bin) {
         char pdata[5];
 
-        encoded = PyUnicode_AsUTF8String(obj);
+        encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
+                                    PyUnicode_GET_SIZE(obj),
+                                    "surrogatepass");
         if (encoded == NULL)
             goto error;
 
@@ -3352,7 +3354,7 @@ load_binunicode(UnpicklerObject *self)
     if (unpickler_read(self, &s, size) < 0)
         return -1;
 
-    str = PyUnicode_DecodeUTF8(s, size, NULL);
+    str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
     if (str == NULL)
         return -1;