From: Serhiy Storchaka Date: Mon, 3 Feb 2014 19:23:46 +0000 (+0200) Subject: Issue #20368: The null character now correctly passed from Tcl to Python (in X-Git-Tag: v2.7.8~55 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ccffb25c5496b00cbe519e6d0e1038f61cdec563;p=python Issue #20368: The null character now correctly passed from Tcl to Python (in unicode strings only). Improved error handling in variables-related commands. --- diff --git a/Lib/lib-tk/test/test_tkinter/test_variables.py b/Lib/lib-tk/test/test_tkinter/test_variables.py index fe64ed2de6..a24ea38756 100644 --- a/Lib/lib-tk/test/test_tkinter/test_variables.py +++ b/Lib/lib-tk/test/test_tkinter/test_variables.py @@ -58,6 +58,14 @@ class TestVariable(TestBase): with self.assertRaises(TypeError): Variable(self.root, name=123) + def test_null_in_name(self): + with self.assertRaises(ValueError): + Variable(self.root, name='var\x00name') + with self.assertRaises(ValueError): + self.root.globalsetvar('var\x00name', "value") + with self.assertRaises(ValueError): + self.root.setvar('var\x00name', "value") + class TestStringVar(TestBase): @@ -71,6 +79,12 @@ class TestStringVar(TestBase): self.root.globalsetvar("name", "value") self.assertEqual("value", v.get()) + def test_get_null(self): + v = StringVar(self.root, "abc\x00def", "name") + self.assertEqual("abc\x00def", v.get()) + self.root.globalsetvar("name", "val\x00ue") + self.assertEqual("val\x00ue", v.get()) + class TestIntVar(TestBase): diff --git a/Lib/test/test_tcl.py b/Lib/test/test_tcl.py index 8f262d01ab..2dad5a0d1b 100644 --- a/Lib/test/test_tcl.py +++ b/Lib/test/test_tcl.py @@ -139,6 +139,18 @@ class TclTest(unittest.TestCase): self.assertEqual(tcl.eval('set b'),'2') self.assertEqual(tcl.eval('set c'),'3') + def test_evalfile_null_in_result(self): + tcl = self.interp + with open(test_support.TESTFN, 'wb') as f: + self.addCleanup(test_support.unlink, test_support.TESTFN) + f.write(""" + set a "a\0b" + set b "a\\0b" + """) + tcl.evalfile(test_support.TESTFN) + self.assertEqual(tcl.eval('set a'), 'a\xc0\x80b') + self.assertEqual(tcl.eval('set b'), 'a\xc0\x80b') + def testEvalFileException(self): tcl = self.interp filename = "doesnotexists" @@ -220,6 +232,7 @@ class TclTest(unittest.TestCase): check('"abc"', 'abc') check('"a\xc2\xbd\xe2\x82\xac"', 'a\xc2\xbd\xe2\x82\xac') check(r'"a\xbd\u20ac"', 'a\xc2\xbd\xe2\x82\xac') + check(r'"a\0b"', 'a\xc0\x80b') def test_exprdouble(self): tcl = self.interp @@ -326,8 +339,17 @@ class TclTest(unittest.TestCase): self.assertEqual(passValue(True), True if self.wantobjects else '1') self.assertEqual(passValue(False), False if self.wantobjects else '0') + self.assertEqual(passValue('string'), 'string') + self.assertEqual(passValue('string\xbd'), 'string\xbd') + self.assertEqual(passValue('string\xe2\x82\xac'), u'string\u20ac') self.assertEqual(passValue(u'string'), u'string') + self.assertEqual(passValue(u'string\xbd'), u'string\xbd') self.assertEqual(passValue(u'string\u20ac'), u'string\u20ac') + self.assertEqual(passValue('str\x00ing'), 'str\x00ing') + self.assertEqual(passValue('str\xc0\x80ing'), 'str\x00ing') + self.assertEqual(passValue(u'str\x00ing'), u'str\x00ing') + self.assertEqual(passValue(u'str\x00ing\xbd'), u'str\x00ing\xbd') + self.assertEqual(passValue(u'str\x00ing\u20ac'), u'str\x00ing\u20ac') for i in (0, 1, -1, int(2**31-1), int(-2**31)): self.assertEqual(passValue(i), i if self.wantobjects else str(i)) for f in (0.0, 1.0, -1.0, 1//3, 1/3.0, @@ -356,14 +378,16 @@ class TclTest(unittest.TestCase): result.append(arg) return arg self.interp.createcommand('testfunc', testfunc) - def check(value, expected, eq=self.assertEqual): + def check(value, expected, expected2=None, eq=self.assertEqual): + if expected2 is None: + expected2 = expected del result[:] r = self.interp.call('testfunc', value) self.assertEqual(len(result), 1) - self.assertIsInstance(result[0], str) - eq(result[0], expected) - self.assertIsInstance(r, str) - eq(r, expected) + self.assertIsInstance(result[0], (str, unicode)) + eq(result[0], expected2) + self.assertIsInstance(r, (str, unicode)) + eq(r, expected2) def float_eq(actual, expected): expected = float(expected) self.assertAlmostEqual(float(actual), expected, @@ -376,7 +400,15 @@ class TclTest(unittest.TestCase): check(False, '0') check('string', 'string') check('string\xbd', 'string\xbd') - check('string\u20ac', 'string\u20ac') + check('string\xe2\x82\xac', 'string\xe2\x82\xac', u'string\u20ac') + check(u'string', u'string') + check(u'string\xbd', 'string\xc2\xbd', u'string\xbd') + check(u'string\u20ac', 'string\xe2\x82\xac', u'string\u20ac') + check('str\xc0\x80ing', 'str\xc0\x80ing', u'str\x00ing') + check('str\xc0\x80ing\xe2\x82\xac', 'str\xc0\x80ing\xe2\x82\xac', u'str\x00ing\u20ac') + check(u'str\x00ing', 'str\xc0\x80ing', u'str\x00ing') + check(u'str\x00ing\xbd', 'str\xc0\x80ing\xc2\xbd', u'str\x00ing\xbd') + check(u'str\x00ing\u20ac', 'str\xc0\x80ing\xe2\x82\xac', u'str\x00ing\u20ac') for i in (0, 1, -1, 2**31-1, -2**31): check(i, str(i)) for f in (0.0, 1.0, -1.0): @@ -405,6 +437,7 @@ class TclTest(unittest.TestCase): (u'a\n b\t\r c\n ', ('a', 'b', 'c')), ('a \xe2\x82\xac', ('a', '\xe2\x82\xac')), (u'a \u20ac', ('a', '\xe2\x82\xac')), + ('a\xc0\x80b c\xc0\x80d', ('a\xc0\x80b', 'c\xc0\x80d')), ('a {b c}', ('a', 'b c')), (r'a b\ c', ('a', 'b c')), (('a', 'b c'), ('a', 'b c')), @@ -449,6 +482,8 @@ class TclTest(unittest.TestCase): (u'a\n b\t\r c\n ', ('a', 'b', 'c')), ('a \xe2\x82\xac', ('a', '\xe2\x82\xac')), (u'a \u20ac', ('a', '\xe2\x82\xac')), + ('a\xc0\x80b', 'a\xc0\x80b'), + ('a\xc0\x80b c\xc0\x80d', ('a\xc0\x80b', 'c\xc0\x80d')), ('a {b c}', ('a', ('b', 'c'))), (r'a b\ c', ('a', ('b', 'c'))), (('a', 'b c'), ('a', ('b', 'c'))), diff --git a/Misc/NEWS b/Misc/NEWS index 48a0085857..2b1dd0755b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -38,6 +38,9 @@ Core and Builtins Library ------- +- Issue #20368: The null character now correctly passed from Tcl to Python (in + unicode strings only). Improved error handling in variables-related commands. + - Issue #20435: Fix _pyio.StringIO.getvalue() to take into account newline translation settings. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 0627d60b67..8d52b891d5 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -456,6 +456,68 @@ Merge(PyObject *args) +#ifdef Py_USING_UNICODE +static PyObject * +unicode_FromTclStringAndSize(const char *s, Py_ssize_t size) +{ + PyObject *r = PyUnicode_DecodeUTF8(s, size, NULL); + if (!r && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + /* Tcl encodes null character as \xc0\x80 */ + if (memchr(s, '\xc0', size)) { + char *buf, *q; + const char *e = s + size; + PyErr_Clear(); + q = buf = (char *)PyMem_Malloc(size); + if (buf == NULL) + return NULL; + while (s != e) { + if (s + 1 != e && s[0] == '\xc0' && s[1] == '\x80') { + *q++ = '\0'; + s += 2; + } + else + *q++ = *s++; + } + s = buf; + size = q - s; + r = PyUnicode_DecodeUTF8(s, size, NULL); + PyMem_Free(buf); + } + } + return r; +} +#endif + +static PyObject * +fromTclStringAndSize(const char *s, Py_ssize_t size) +{ + PyObject *r; +#ifdef Py_USING_UNICODE + Py_ssize_t i; + /* If Tcl string contains any bytes with the top bit set, + it's UTF-8 and we should decode it to Unicode */ + for (i = 0; i < size; i++) + if (s[i] & 0x80) + break; + if (i != size) { + /* It isn't an ASCII string. */ + r = unicode_FromTclStringAndSize(s, size); + if (r) + return r; + PyErr_Clear(); + } +#endif + r = PyString_FromStringAndSize(s, size); + return r; +} + +static PyObject * +fromTclString(const char *s) +{ + return fromTclStringAndSize(s, strlen(s)); +} + + static PyObject * Split(char *list) { @@ -841,27 +903,10 @@ PyDoc_STRVAR(PyTclObject_string__doc__, static PyObject * PyTclObject_string(PyTclObject *self, void *ignored) { - char *s; - int i, len; if (!self->string) { - s = Tcl_GetStringFromObj(self->value, &len); - for (i = 0; i < len; i++) - if (s[i] & 0x80) - break; -#ifdef Py_USING_UNICODE - if (i == len) - /* It is an ASCII string. */ - self->string = PyString_FromStringAndSize(s, len); - else { - self->string = PyUnicode_DecodeUTF8(s, len, "strict"); - if (!self->string) { - PyErr_Clear(); - self->string = PyString_FromStringAndSize(s, len); - } - } -#else - self->string = PyString_FromStringAndSize(s, len); -#endif + int len; + char *s = Tcl_GetStringFromObj(self->value, &len); + self->string = fromTclStringAndSize(s, len); if (!self->string) return NULL; } @@ -883,7 +928,7 @@ PyTclObject_unicode(PyTclObject *self, void *ignored) } /* XXX Could chache result if it is non-ASCII. */ s = Tcl_GetStringFromObj(self->value, &len); - return PyUnicode_DecodeUTF8(s, len, "strict"); + return unicode_FromTclStringAndSize(s, len); } #endif @@ -1022,6 +1067,8 @@ AsObj(PyObject *value) PyErr_SetString(PyExc_OverflowError, "string is too long"); return NULL; } + if (sizeof(Py_UNICODE) == sizeof(Tcl_UniChar)) + return Tcl_NewUnicodeObj(inbuf, size); allocsize = ((size_t)size) * sizeof(Tcl_UniChar); if (allocsize >= size) outbuf = (Tcl_UniChar*)ckalloc(allocsize); @@ -1073,30 +1120,7 @@ FromObj(PyObject* tkapp, Tcl_Obj *value) TkappObject *app = (TkappObject*)tkapp; if (value->typePtr == NULL) { - /* If the result contains any bytes with the top bit set, - it's UTF-8 and we should decode it to Unicode */ -#ifdef Py_USING_UNICODE - int i; - char *s = value->bytes; - int len = value->length; - for (i = 0; i < len; i++) { - if (value->bytes[i] & 0x80) - break; - } - - if (i == value->length) - result = PyString_FromStringAndSize(s, len); - else { - /* Convert UTF-8 to Unicode string */ - result = PyUnicode_DecodeUTF8(s, len, "strict"); - if (result == NULL) { - PyErr_Clear(); - result = PyString_FromStringAndSize(s, len); - } - } -#else - result = PyString_FromStringAndSize(value->bytes, value->length); -#endif + result = fromTclStringAndSize(value->bytes, value->length); return result; } @@ -1273,8 +1297,8 @@ static PyObject* Tkapp_CallResult(TkappObject *self) { PyObject *res = NULL; + Tcl_Obj *value = Tcl_GetObjResult(self->interp); if(self->wantobjects) { - Tcl_Obj *value = Tcl_GetObjResult(self->interp); /* Not sure whether the IncrRef is necessary, but something may overwrite the interpreter result while we are converting it. */ @@ -1282,33 +1306,9 @@ Tkapp_CallResult(TkappObject *self) res = FromObj((PyObject*)self, value); Tcl_DecrRefCount(value); } else { - const char *s = Tcl_GetStringResult(self->interp); - const char *p = s; - - /* If the result contains any bytes with the top bit set, - it's UTF-8 and we should decode it to Unicode */ -#ifdef Py_USING_UNICODE - while (*p != '\0') { - if (*p & 0x80) - break; - p++; - } - - if (*p == '\0') - res = PyString_FromStringAndSize(s, (int)(p-s)); - else { - /* Convert UTF-8 to Unicode string */ - p = strchr(p, '\0'); - res = PyUnicode_DecodeUTF8(s, (int)(p-s), "strict"); - if (res == NULL) { - PyErr_Clear(); - res = PyString_FromStringAndSize(s, (int)(p-s)); - } - } -#else - p = strchr(p, '\0'); - res = PyString_FromStringAndSize(s, (int)(p-s)); -#endif + int len; + const char *s = Tcl_GetStringFromObj(value, &len); + res = fromTclStringAndSize(s, len); } return res; } @@ -1611,16 +1611,28 @@ typedef struct VarEvent { static int varname_converter(PyObject *in, void *_out) { + char *s; char **out = (char**)_out; if (PyString_Check(in)) { - *out = PyString_AsString(in); + if (PyString_Size(in) > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "string is too long"); + return 0; + } + s = PyString_AsString(in); + if (strlen(s) != PyString_Size(in)) { + PyErr_SetString(PyExc_ValueError, "null character in string"); + return 0; + } + *out = s; return 1; } if (PyTclObject_Check(in)) { *out = PyTclObject_TclString(in); return 1; } - /* XXX: Should give diagnostics. */ + PyErr_Format(PyExc_TypeError, + "must be str or Tcl_Obj, not %.50s", + in->ob_type->tp_name); return 0; } @@ -1706,8 +1718,11 @@ SetVar(PyObject *self, PyObject *args, int flags) PyObject *res = NULL; Tcl_Obj *newval, *ok; - if (PyArg_ParseTuple(args, "O&O:setvar", - varname_converter, &name1, &newValue)) { + switch (PyTuple_GET_SIZE(args)) { + case 2: + if (!PyArg_ParseTuple(args, "O&O:setvar", + varname_converter, &name1, &newValue)) + return NULL; /* XXX Acquire tcl lock??? */ newval = AsObj(newValue); if (newval == NULL) @@ -1723,27 +1738,27 @@ SetVar(PyObject *self, PyObject *args, int flags) Py_INCREF(res); } LEAVE_OVERLAP_TCL - } - else { - PyErr_Clear(); - if (PyArg_ParseTuple(args, "ssO:setvar", - &name1, &name2, &newValue)) { - /* XXX must hold tcl lock already??? */ - newval = AsObj(newValue); - ENTER_TCL - ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags); - ENTER_OVERLAP - if (!ok) - Tkinter_Error(self); - else { - res = Py_None; - Py_INCREF(res); - } - LEAVE_OVERLAP_TCL - } - else { + break; + case 3: + if (!PyArg_ParseTuple(args, "ssO:setvar", + &name1, &name2, &newValue)) return NULL; + /* XXX must hold tcl lock already??? */ + newval = AsObj(newValue); + ENTER_TCL + ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags); + ENTER_OVERLAP + if (!ok) + Tkinter_Error(self); + else { + res = Py_None; + Py_INCREF(res); } + LEAVE_OVERLAP_TCL + break; + default: + PyErr_SetString(PyExc_TypeError, "setvar requires 2 to 3 arguments"); + return NULL; } return res; } @@ -1783,7 +1798,9 @@ GetVar(PyObject *self, PyObject *args, int flags) res = FromObj(self, tres); } else { - res = PyString_FromString(Tcl_GetString(tres)); + int len; + char *s = Tcl_GetStringFromObj(tres, &len); + res = PyString_FromStringAndSize(s, len); } } LEAVE_OVERLAP_TCL @@ -1921,7 +1938,7 @@ Tkapp_ExprString(PyObject *self, PyObject *args) if (retval == TCL_ERROR) res = Tkinter_Error(self); else - res = Py_BuildValue("s", Tkapp_Result(self)); + res = PyString_FromString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -2158,7 +2175,7 @@ PythonCmd(ClientData clientData, Tcl_Interp *interp, int argc, char *argv[]) return PythonCmd_Error(interp); for (i = 0; i < (argc - 1); i++) { - PyObject *s = PyString_FromString(argv[i + 1]); + PyObject *s = fromTclString(argv[i + 1]); if (!s || PyTuple_SetItem(arg, i, s)) { Py_DECREF(arg); return PythonCmd_Error(interp);