(Merge 3.2) Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()
authorVictor Stinner <victor.stinner@haypocalc.com>
Tue, 22 Nov 2011 00:50:07 +0000 (01:50 +0100)
committerVictor Stinner <victor.stinner@haypocalc.com>
Tue, 22 Nov 2011 00:50:07 +0000 (01:50 +0100)
Lib/test/test_unicode.py
Objects/unicodeobject.c

index c63364aff8f3586d8960766859ba60f41522a9df..b20f8781490ca2fa538fb204edd93f8b2ea9a99d 100644 (file)
@@ -1824,6 +1824,12 @@ class UnicodeTest(string_tests.CommonTest,
                          b'123&#8364;')
         self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
                          b'123\\u20ac')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
+                         b'123? ')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
+                         b'123??')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
+                         b'123?0')
 
     def test_transform_decimal(self):
         from _testcapi import unicode_transformdecimaltoascii as transform_decimal
index 7f079e789972aa9d47561c52d4db0d76d6477cd5..16db801b5c3a8a5fcc8618d6828c798b7709d294 100644 (file)
@@ -8875,22 +8875,25 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
     kind = PyUnicode_KIND(unicode);
     data = PyUnicode_DATA(unicode);
 
-    for (i=0; i < length; i++) {
+    for (i=0; i < length; ) {
         Py_UCS4 ch = PyUnicode_READ(kind, data, i);
         int decimal;
         Py_ssize_t startpos, endpos;
 
         if (Py_UNICODE_ISSPACE(ch)) {
             *output++ = ' ';
+            i++;
             continue;
         }
         decimal = Py_UNICODE_TODECIMAL(ch);
         if (decimal >= 0) {
             *output++ = '0' + decimal;
+            i++;
             continue;
         }
         if (0 < ch && ch < 256) {
             *output++ = (char)ch;
+            i++;
             continue;
         }
         /* All other characters are considered unencodable */
@@ -8899,8 +8902,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
         for (; endpos < length; endpos++) {
             ch = PyUnicode_READ(kind, data, endpos);
             if ((0 < ch && ch < 256) ||
-                !Py_UNICODE_ISSPACE(ch) ||
-                Py_UNICODE_TODECIMAL(ch))
+                Py_UNICODE_ISSPACE(ch) ||
+                0 <= Py_UNICODE_TODECIMAL(ch))
                 break;
         }
         /* cache callback name lookup
@@ -8924,7 +8927,8 @@ PyUnicode_EncodeDecimal(Py_UNICODE *s,
         case 2: /* replace */
             for (j=startpos; j < endpos; j++)
                 *output++ = '?';
-            /* fall through */
+            i = endpos;
+            break;
         case 3: /* ignore */
             i = endpos;
             break;