]> granicus.if.org Git - python/commitdiff
Issue #13093: Fix error handling on PyUnicode_EncodeDecimal()
authorVictor Stinner <victor.stinner@haypocalc.com>
Tue, 22 Nov 2011 00:45:37 +0000 (01:45 +0100)
committerVictor Stinner <victor.stinner@haypocalc.com>
Tue, 22 Nov 2011 00:45:37 +0000 (01:45 +0100)
 * Add tests for PyUnicode_EncodeDecimal() and PyUnicode_TransformDecimalToASCII()
 * Remove the unused "e" variable in replace()

Lib/test/test_unicode.py
Modules/_testcapimodule.c
Objects/unicodeobject.c

index 591a297756be114163527d9fbda29c24d570294d..259a181b3bd5a9baf34250e9ea2c0671e575e319 100644 (file)
@@ -1767,6 +1767,42 @@ class StringModuleTest(unittest.TestCase):
             ]])
         self.assertRaises(TypeError, _string.formatter_field_name_split, 1)
 
+    def test_encode_decimal(self):
+        from _testcapi import unicode_encodedecimal
+        self.assertEqual(unicode_encodedecimal('123'),
+                         b'123')
+        self.assertEqual(unicode_encodedecimal('\u0663.\u0661\u0664'),
+                         b'3.14')
+        self.assertEqual(unicode_encodedecimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+                         b' 3.14 ')
+        self.assertRaises(UnicodeEncodeError,
+                          unicode_encodedecimal, "123\u20ac", "strict")
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "replace"),
+                         b'123?')
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "ignore"),
+                         b'123')
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "xmlcharrefreplace"),
+                         b'123&#8364;')
+        self.assertEqual(unicode_encodedecimal("123\u20ac", "backslashreplace"),
+                         b'123\\u20ac')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\N{EM SPACE}", "replace"),
+                         b'123? ')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\u20ac", "replace"),
+                         b'123??')
+        self.assertEqual(unicode_encodedecimal("123\u20ac\u0660", "replace"),
+                         b'123?0')
+
+    def test_transform_decimal(self):
+        from _testcapi import unicode_transformdecimaltoascii as transform_decimal
+        self.assertEqual(transform_decimal('123'),
+                         '123')
+        self.assertEqual(transform_decimal('\u0663.\u0661\u0664'),
+                         '3.14')
+        self.assertEqual(transform_decimal("\N{EM SPACE}3.14\N{EN SPACE}"),
+                         "\N{EM SPACE}3.14\N{EN SPACE}")
+        self.assertEqual(transform_decimal('123\u20ac'),
+                         '123\u20ac')
+
 
 def test_main():
     support.run_unittest(__name__)
index f19d0df5944bc853b3da24d953a7a7646f1a3719..6c61f7db8e4074b3e9751e77fd16fe50ef321413 100644 (file)
@@ -1437,6 +1437,51 @@ unicode_aswidecharstring(PyObject *self, PyObject *args)
     return Py_BuildValue("(Nn)", result, size);
 }
 
+static PyObject *
+unicode_encodedecimal(PyObject *self, PyObject *args)
+{
+    Py_UNICODE *unicode;
+    Py_ssize_t length;
+    char *errors = NULL;
+    PyObject *decimal;
+    Py_ssize_t decimal_length, new_length;
+    int res;
+
+    if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length, &errors))
+        return NULL;
+
+    decimal_length = length * 7; /* len('&#8364;') */
+    decimal = PyBytes_FromStringAndSize(NULL, decimal_length);
+    if (decimal == NULL)
+        return NULL;
+
+    res = PyUnicode_EncodeDecimal(unicode, length,
+                                  PyBytes_AS_STRING(decimal),
+                                  errors);
+    if (res < 0) {
+        Py_DECREF(decimal);
+        return NULL;
+    }
+
+    new_length = strlen(PyBytes_AS_STRING(decimal));
+    assert(new_length <= decimal_length);
+    res = _PyBytes_Resize(&decimal, new_length);
+    if (res < 0)
+        return NULL;
+
+    return decimal;
+}
+
+static PyObject *
+unicode_transformdecimaltoascii(PyObject *self, PyObject *args)
+{
+    Py_UNICODE *unicode;
+    Py_ssize_t length;
+    if (!PyArg_ParseTuple(args, "u#|s", &unicode, &length))
+        return NULL;
+    return PyUnicode_TransformDecimalToASCII(unicode, length);
+}
+
 static PyObject *
 getargs_w_star(PyObject *self, PyObject *args)
 {
@@ -2320,8 +2365,10 @@ static PyMethodDef TestMethods[] = {
     {"test_u_code",             (PyCFunction)test_u_code,        METH_NOARGS},
     {"test_Z_code",             (PyCFunction)test_Z_code,        METH_NOARGS},
     {"test_widechar",           (PyCFunction)test_widechar,      METH_NOARGS},
-    {"unicode_aswidechar",      unicode_aswidechar,                 METH_VARARGS},
-    {"unicode_aswidecharstring",unicode_aswidecharstring,           METH_VARARGS},
+    {"unicode_aswidechar",      unicode_aswidechar,              METH_VARARGS},
+    {"unicode_aswidecharstring",unicode_aswidecharstring,        METH_VARARGS},
+    {"unicode_encodedecimal",   unicode_encodedecimal,           METH_VARARGS},
+    {"unicode_transformdecimaltoascii", unicode_transformdecimaltoascii, METH_VARARGS},
 #ifdef WITH_THREAD
     {"_test_thread_state",      test_thread_state,               METH_VARARGS},
     {"_pending_threadfunc",     pending_threadfunc,              METH_VARARGS},
index 8680726275e7cb2334ff12ddef63f1f499ac5e4d..d13c5470b35db90bf3ec4cbf59c2b4bf7b59cbfd 100644 (file)
@@ -6323,11 +6323,10 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s,
         }
         /* All other characters are considered unencodable */
         collstart = p;
-        collend = p+1;
-        while (collend < end) {
+        for (collend = p+1; collend < end; collend++) {
             if ((0 < *collend && *collend < 256) ||
-                !Py_UNICODE_ISSPACE(*collend) ||
-                Py_UNICODE_TODECIMAL(*collend))
+                Py_UNICODE_ISSPACE(*collend) ||
+                0 <= Py_UNICODE_TODECIMAL(*collend))
                 break;
         }
         /* cache callback name lookup
@@ -7004,7 +7003,7 @@ PyObject *replace(PyUnicodeObject *self,
         }
     } else {
 
-        Py_ssize_t n, i, j, e;
+        Py_ssize_t n, i, j;
         Py_ssize_t product, new_size, delta;
         Py_UNICODE *p;
 
@@ -7036,7 +7035,6 @@ PyObject *replace(PyUnicodeObject *self,
             return NULL;
         i = 0;
         p = u->str;
-        e = self->length - str1->length;
         if (str1->length > 0) {
             while (n-- > 0) {
                 /* look for next match */