]> granicus.if.org Git - python/commitdiff
#2798: PyArg_ParseTuple did not correctly handle the "s" code in case of unicode...
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>
Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>
Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)
with chars outside the 7bit ascii (s# was already correct).

This is necessary to allow python run from a non-ASCII directory,
and seems enough on some platforms, probably where the default PyUnicode encoding (utf-8)
is also the default filesystem encoding.

Misc/NEWS
Modules/_testcapimodule.c
Python/getargs.c

index 0af5a1033bc30f587b8a78776d2488da0952c920..e40844e726276cae147c5d0bf34c104ff724088c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's new in Python 3.0b1?
 Core and Builtins
 -----------------
 
+- Issue 2798: When parsing arguments with PyArg_ParseTuple, the "s" code now
+  allows any unicode string and returns a utf-8 encoded buffer, just like the
+  "s#" code already does.  The "z" code was corrected as well.
+
 - Issue 2801: fix bug in the float.is_integer method where a ValueError
   was sometimes incorrectly raised.
 
index cdee97557246bd017115d2d2deaecd11e3684d0e..2e68a6693161e2afa02de57831aa7ef6de569d79 100644 (file)
@@ -475,6 +475,38 @@ test_k_code(PyObject *self)
 }
 
 
+/* Test the s and z codes for PyArg_ParseTuple.
+*/
+static PyObject *
+test_s_code(PyObject *self)
+{
+    /* Unicode strings should be accepted */
+    PyObject *tuple, *obj;
+    char *value;
+
+    tuple = PyTuple_New(1);
+    if (tuple == NULL)
+        return NULL;
+
+    obj = PyUnicode_Decode("t\xeate", strlen("t\xeate"),
+                          "latin-1", NULL);
+    if (obj == NULL)
+       return NULL;
+
+    PyTuple_SET_ITEM(tuple, 0, obj);
+
+    /* These two blocks used to raise a TypeError:
+     * "argument must be string without null bytes, not str" 
+     */
+    if (PyArg_ParseTuple(tuple, "s:test_s_code1", &value) < 0)
+       return NULL;
+
+    if (PyArg_ParseTuple(tuple, "z:test_s_code2", &value) < 0)
+       return NULL;
+
+    Py_RETURN_NONE;
+}
+
 /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
    of an error.
 */
@@ -952,6 +984,7 @@ static PyMethodDef TestMethods[] = {
        {"codec_incrementaldecoder",
         (PyCFunction)codec_incrementaldecoder,  METH_VARARGS},
 #endif
+       {"test_s_code",         (PyCFunction)test_s_code,        METH_NOARGS},
        {"test_u_code",         (PyCFunction)test_u_code,        METH_NOARGS},
        {"test_Z_code",         (PyCFunction)test_Z_code,        METH_NOARGS},
 #ifdef WITH_THREAD
index 2bbafdb9db55a1aab2db8d71e6d18c07498ce6a8..427a951274f8d73c0ab6d4e6adc55f4d455b3a41 100644 (file)
@@ -822,10 +822,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                        }
                        else
                                return converterr("string", arg, msgbuf, bufsize);
-                       /* XXX(gb): this test is completely wrong -- p is a
-                        * byte string while arg is a Unicode. I *think* it should
-                        * check against the size of uarg... */
-                       if ((Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+                       if ((Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
                                return converterr("string without null bytes",
                                                  arg, msgbuf, bufsize);
                }
@@ -874,11 +871,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                        format++;
                } else {
                        char **p = va_arg(*p_va, char **);
+                       uarg = NULL;
 
                        if (arg == Py_None)
                                *p = 0;
-                       else if (PyString_Check(arg))
+                       else if (PyString_Check(arg)) {
+                               /* Enable null byte check below */
+                               uarg = arg;
                                *p = PyString_AS_STRING(arg);
+                       }
                        else if (PyUnicode_Check(arg)) {
                                uarg = UNICODE_DEFAULT_ENCODING(arg);
                                if (uarg == NULL)
@@ -900,9 +901,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                                }
                                format++;
                        }
-                       /* XXX(gb): same comment as for 's' applies here... */
-                       else if (*p != NULL &&
-                                (Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+                       else if (*p != NULL && uarg != NULL &&
+                               (Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
                                return converterr(
                                        "string without null bytes or None",
                                        arg, msgbuf, bufsize);