#2798: PyArg_ParseTuple did not correctly handle the "s" code in case of unicode...

author Amaury Forgeot d'Arc <amauryfa@gmail.com>

Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)

committer Amaury Forgeot d'Arc <amauryfa@gmail.com>

Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)
author Amaury Forgeot d'Arc <amauryfa@gmail.com>
Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)
committer Amaury Forgeot d'Arc <amauryfa@gmail.com>
Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)
diff --git a/Misc/NEWS b/Misc/NEWS

index 0af5a1033bc30f587b8a78776d2488da0952c920..e40844e726276cae147c5d0bf34c104ff724088c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,10 @@ What's new in Python 3.0b1?
  Core and Builtins
  -----------------
  
+- Issue 2798: When parsing arguments with PyArg_ParseTuple, the "s" code now
+  allows any unicode string and returns a utf-8 encoded buffer, just like the
+  "s#" code already does.  The "z" code was corrected as well.
+
  - Issue 2801: fix bug in the float.is_integer method where a ValueError
    was sometimes incorrectly raised.
  
diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c

index cdee97557246bd017115d2d2deaecd11e3684d0e..2e68a6693161e2afa02de57831aa7ef6de569d79 100644 (file)
--- a/Modules/_testcapimodule.c
+++ b/Modules/_testcapimodule.c
@@ -475,6 +475,38 @@ test_k_code(PyObject *self)
  }
  
  
+/* Test the s and z codes for PyArg_ParseTuple.
+*/
+static PyObject *
+test_s_code(PyObject *self)
+{
+    /* Unicode strings should be accepted */
+    PyObject *tuple, *obj;
+    char *value;
+
+    tuple = PyTuple_New(1);
+    if (tuple == NULL)
+        return NULL;
+
+    obj = PyUnicode_Decode("t\xeate", strlen("t\xeate"),
+                          "latin-1", NULL);
+    if (obj == NULL)
+       return NULL;
+
+    PyTuple_SET_ITEM(tuple, 0, obj);
+
+    /* These two blocks used to raise a TypeError:
+     * "argument must be string without null bytes, not str" 
+     */
+    if (PyArg_ParseTuple(tuple, "s:test_s_code1", &value) < 0)
+       return NULL;
+
+    if (PyArg_ParseTuple(tuple, "z:test_s_code2", &value) < 0)
+       return NULL;
+
+    Py_RETURN_NONE;
+}
+
  /* Test the u and u# codes for PyArg_ParseTuple. May leak memory in case
     of an error.
  */
@@ -952,6 +984,7 @@ static PyMethodDef TestMethods[] = {
         {"codec_incrementaldecoder",
          (PyCFunction)codec_incrementaldecoder,  METH_VARARGS},
  #endif
+       {"test_s_code",         (PyCFunction)test_s_code,        METH_NOARGS},
         {"test_u_code",         (PyCFunction)test_u_code,        METH_NOARGS},
         {"test_Z_code",         (PyCFunction)test_Z_code,        METH_NOARGS},
  #ifdef WITH_THREAD
diff --git a/Python/getargs.c b/Python/getargs.c

index 2bbafdb9db55a1aab2db8d71e6d18c07498ce6a8..427a951274f8d73c0ab6d4e6adc55f4d455b3a41 100644 (file)
--- a/Python/getargs.c
+++ b/Python/getargs.c
@@ -822,10 +822,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                         }
                         else
                                 return converterr("string", arg, msgbuf, bufsize);
-                       /* XXX(gb): this test is completely wrong -- p is a
-                        * byte string while arg is a Unicode. I *think* it should
-                        * check against the size of uarg... */
-                       if ((Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+                       if ((Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
                                 return converterr("string without null bytes",
                                                   arg, msgbuf, bufsize);
                 }
@@ -874,11 +871,15 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                         format++;
                 } else {
                         char **p = va_arg(*p_va, char **);
+                       uarg = NULL;
  
                         if (arg == Py_None)
                                 *p = 0;
-                       else if (PyString_Check(arg))
+                       else if (PyString_Check(arg)) {
+                               /* Enable null byte check below */
+                               uarg = arg;
                                 *p = PyString_AS_STRING(arg);
+                       }
                         else if (PyUnicode_Check(arg)) {
                                 uarg = UNICODE_DEFAULT_ENCODING(arg);
                                 if (uarg == NULL)
@@ -900,9 +901,8 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
                                 }
                                 format++;
                         }
-                       /* XXX(gb): same comment as for 's' applies here... */
-                       else if (*p != NULL &&
-                                (Py_ssize_t)strlen(*p) != PyUnicode_GetSize(arg))
+                       else if (*p != NULL && uarg != NULL &&
+                               (Py_ssize_t) strlen(*p) != PyString_GET_SIZE(uarg))
                                 return converterr(
                                         "string without null bytes or None",
                                         arg, msgbuf, bufsize);
author	Amaury Forgeot d'Arc <amauryfa@gmail.com>
	Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)
committer	Amaury Forgeot d'Arc <amauryfa@gmail.com>
	Mon, 12 May 2008 13:19:07 +0000 (13:19 +0000)
Misc/NEWS		patch \| blob \| history
Modules/_testcapimodule.c		patch \| blob \| history
Python/getargs.c		patch \| blob \| history