]> granicus.if.org Git - python/commitdiff
Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()
authorVictor Stinner <victor.stinner@haypocalc.com>
Tue, 22 Nov 2011 00:54:19 +0000 (01:54 +0100)
committerVictor Stinner <victor.stinner@haypocalc.com>
Tue, 22 Nov 2011 00:54:19 +0000 (01:54 +0100)
Add tests for PyUnicode_EncodeDecimal()

Lib/test/test_unicode.py
Modules/_testcapimodule.c
Objects/unicodeobject.c

index 5c1858c9b40b45bc02d6db952acad0a66a549c3a..fda44da8888e51422f78acdd6db230c8ac1b817e 100644 (file)
@@ -1606,6 +1606,31 @@ class UnicodeTest(
         self.assertEqual("%s" % u, u'__unicode__ overridden')
         self.assertEqual("{}".format(u), '__unicode__ overridden')
 
+    def test_encode_decimal(self):
+        from _testcapi import unicode_encodedecimal
+        self.assertEqual(unicode_encodedecimal(u'123'),
+                         b'123')
+        self.assertEqual(unicode_encodedecimal(u'\u0663.\u0661\u0664'),
+                         b'3.14')
+        self.assertEqual(unicode_encodedecimal(u"\N{EM SPACE}3.14\N{EN SPACE}"),
+                         b' 3.14 ')
+        self.assertRaises(UnicodeEncodeError,
+                          unicode_encodedecimal, u"123\u20ac", "strict")
+        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "replace"),
+                         b'123?')
+        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "ignore"),
+                         b'123')
+        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "xmlcharrefreplace"),
+                         b'123&#8364;')
+        self.assertEqual(unicode_encodedecimal(u"123\u20ac", "backslashreplace"),
+                         b'123\\u20ac')
+        self.assertEqual(unicode_encodedecimal(u"123\u20ac\N{EM SPACE}", "replace"),
+                         b'123? ')
+        self.assertEqual(unicode_encodedecimal(u"123\u20ac\u20ac", "replace"),
+                         b'123??')
+        self.assertEqual(unicode_encodedecimal(u"123\u20ac\u0660", "replace"),
+                         b'123?0')
+
 
 def test_main():
     test_support.run_unittest(__name__)
index 9c45274a70487e96df14ee909b712e1cddecc960..bb1cc83830ecea0567a0bba1df1f622208e60af3 100644 (file)
@@ -1105,6 +1105,41 @@ test_widechar(PyObject *self)
     Py_RETURN_NONE;
 }
 
+static PyObject *
+unicode_encodedecimal(PyObject *self, PyObject *args)
+{
+    Py_UNICODE *unicode;
+    Py_ssize_t length;
+    char *errors = NULL;
+    PyObject *decimal;
+    Py_ssize_t decimal_length, new_length;
+    int res;
+
+    if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length, &errors))
+        return NULL;
+
+    decimal_length = length * 7; /* len('&#8364;') */
+    decimal = PyBytes_FromStringAndSize(NULL, decimal_length);
+    if (decimal == NULL)
+        return NULL;
+
+    res = PyUnicode_EncodeDecimal(unicode, length,
+                                  PyBytes_AS_STRING(decimal),
+                                  errors);
+    if (res < 0) {
+        Py_DECREF(decimal);
+        return NULL;
+    }
+
+    new_length = strlen(PyBytes_AS_STRING(decimal));
+    assert(new_length <= decimal_length);
+    res = _PyBytes_Resize(&decimal, new_length);
+    if (res < 0)
+        return NULL;
+
+    return decimal;
+}
+
 static PyObject *
 test_empty_argparse(PyObject *self)
 {
@@ -1698,6 +1733,7 @@ static PyMethodDef TestMethods[] = {
 #ifdef Py_USING_UNICODE
     {"test_u_code",             (PyCFunction)test_u_code,        METH_NOARGS},
     {"test_widechar",           (PyCFunction)test_widechar,      METH_NOARGS},
+    {"unicode_encodedecimal",   unicode_encodedecimal,           METH_VARARGS},
 #endif
 #ifdef WITH_THREAD
     {"_test_thread_state",  test_thread_state,                   METH_VARARGS},
index 5ce879d4409225c10b8bbac523fb1b7257ce945c..8225e8267851068388096e6bed487fdc5d32738a 100644 (file)
@@ -5160,11 +5160,10 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
         }
         /* All other characters are considered unencodable */
         collstart = p;
-        collend = p+1;
-        while (collend < end) {
+        for (collend = p+1; collend < end; collend++) {
             if ((0 < *collend && *collend < 256) ||
-                !Py_UNICODE_ISSPACE(*collend) ||
-                Py_UNICODE_TODECIMAL(*collend))
+                Py_UNICODE_ISSPACE(*collend) ||
+                0 <= Py_UNICODE_TODECIMAL(*collend))
                 break;
         }
         /* cache callback name lookup