From 1317e1446805f1ab6b3299eef0a2910bab2887e0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 3 Feb 2014 21:24:07 +0200 Subject: [PATCH] Issue #20368: The null character now correctly passed from Tcl to Python. Improved error handling in variables-related commands. --- Lib/test/test_tcl.py | 50 ++++- .../test/test_tkinter/test_variables.py | 18 ++ Misc/NEWS | 3 + Modules/_tkinter.c | 182 +++++++++++------- 4 files changed, 175 insertions(+), 78 deletions(-) diff --git a/Lib/test/test_tcl.py b/Lib/test/test_tcl.py index b73a11dbfc..308751de05 100644 --- a/Lib/test/test_tcl.py +++ b/Lib/test/test_tcl.py @@ -55,6 +55,10 @@ class TclTest(unittest.TestCase): tcl.eval('set a 1') self.assertEqual(tcl.eval('set a'),'1') + def test_eval_null_in_result(self): + tcl = self.interp + self.assertEqual(tcl.eval('set a "a\\0b"'), 'a\x00b') + def testEvalException(self): tcl = self.interp self.assertRaises(TclError,tcl.eval,'set a') @@ -127,20 +131,29 @@ class TclTest(unittest.TestCase): def testEvalFile(self): tcl = self.interp - filename = "testEvalFile.tcl" - fd = open(filename,'w') - script = """set a 1 - set b 2 - set c [ expr $a + $b ] - """ - fd.write(script) - fd.close() - tcl.evalfile(filename) - os.remove(filename) + with open(support.TESTFN, 'w') as f: + self.addCleanup(support.unlink, support.TESTFN) + f.write("""set a 1 + set b 2 + set c [ expr $a + $b ] + """) + tcl.evalfile(support.TESTFN) self.assertEqual(tcl.eval('set a'),'1') self.assertEqual(tcl.eval('set b'),'2') self.assertEqual(tcl.eval('set c'),'3') + def test_evalfile_null_in_result(self): + tcl = self.interp + with open(support.TESTFN, 'w') as f: + self.addCleanup(support.unlink, support.TESTFN) + f.write(""" + set a "a\0b" + set b "a\\0b" + """) + tcl.evalfile(support.TESTFN) + self.assertEqual(tcl.eval('set a'), 'a\x00b') + self.assertEqual(tcl.eval('set b'), 'a\x00b') + def testEvalFileException(self): tcl = self.interp filename = "doesnotexists" @@ -209,6 +222,7 @@ class TclTest(unittest.TestCase): check('"abc"', 'abc') check('"a\xbd\u20ac"', 'a\xbd\u20ac') check(r'"a\xbd\u20ac"', 'a\xbd\u20ac') + check(r'"a\0b"', 'a\x00b') def test_exprdouble(self): tcl = self.interp @@ -320,6 +334,11 @@ class TclTest(unittest.TestCase): self.assertEqual(passValue(False), False if self.wantobjects else '0') self.assertEqual(passValue('string'), 'string') self.assertEqual(passValue('string\u20ac'), 'string\u20ac') + self.assertEqual(passValue('str\x00ing'), 'str\x00ing') + self.assertEqual(passValue('str\x00ing\xbd'), 'str\x00ing\xbd') + self.assertEqual(passValue('str\x00ing\u20ac'), 'str\x00ing\u20ac') + self.assertEqual(passValue(b'str\x00ing'), 'str\x00ing') + self.assertEqual(passValue(b'str\xc0\x80ing'), 'str\x00ing') for i in (0, 1, -1, 2**31-1, -2**31): self.assertEqual(passValue(i), i if self.wantobjects else str(i)) for f in (0.0, 1.0, -1.0, 1/3, @@ -368,6 +387,13 @@ class TclTest(unittest.TestCase): check('string', 'string') check('string\xbd', 'string\xbd') check('string\u20ac', 'string\u20ac') + check(b'string', 'string') + check(b'string\xe2\x82\xac', 'string\u20ac') + check('str\x00ing', 'str\x00ing') + check('str\x00ing\xbd', 'str\x00ing\xbd') + check('str\x00ing\u20ac', 'str\x00ing\u20ac') + check(b'str\xc0\x80ing', 'str\x00ing') + check(b'str\xc0\x80ing\xe2\x82\xac', 'str\x00ing\u20ac') for i in (0, 1, -1, 2**31-1, -2**31): check(i, str(i)) for f in (0.0, 1.0, -1.0): @@ -396,6 +422,7 @@ class TclTest(unittest.TestCase): (b'a\n b\t\r c\n ', ('a', 'b', 'c')), ('a \u20ac', ('a', '\u20ac')), (b'a \xe2\x82\xac', ('a', '\u20ac')), + (b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')), ('a {b c}', ('a', 'b c')), (r'a b\ c', ('a', 'b c')), (('a', 'b c'), ('a', 'b c')), @@ -438,6 +465,9 @@ class TclTest(unittest.TestCase): (b'a\n b\t\r c\n ', ('a', 'b', 'c')), ('a \u20ac', ('a', '\u20ac')), (b'a \xe2\x82\xac', ('a', '\u20ac')), + (b'a\xc0\x80b', 'a\x00b'), + (b'a\xc0\x80b c\xc0\x80d', ('a\x00b', 'c\x00d')), + (b'{a\xc0\x80b c\xc0\x80d', '{a\x00b c\x00d'), ('a {b c}', ('a', ('b', 'c'))), (r'a b\ c', ('a', ('b', 'c'))), (('a', b'b c'), ('a', ('b', 'c'))), diff --git a/Lib/tkinter/test/test_tkinter/test_variables.py b/Lib/tkinter/test/test_tkinter/test_variables.py index fa1f89842f..9d910ac233 100644 --- a/Lib/tkinter/test/test_tkinter/test_variables.py +++ b/Lib/tkinter/test/test_tkinter/test_variables.py @@ -68,6 +68,18 @@ class TestVariable(TestBase): with self.assertRaises(TypeError): Variable(self.root, name=123) + def test_null_in_name(self): + with self.assertRaises(ValueError): + Variable(self.root, name='var\x00name') + with self.assertRaises(ValueError): + self.root.globalsetvar('var\x00name', "value") + with self.assertRaises(ValueError): + self.root.globalsetvar(b'var\x00name', "value") + with self.assertRaises(ValueError): + self.root.setvar('var\x00name', "value") + with self.assertRaises(ValueError): + self.root.setvar(b'var\x00name', "value") + def test_initialize(self): v = Var() self.assertFalse(v.side_effect) @@ -87,6 +99,12 @@ class TestStringVar(TestBase): self.root.globalsetvar("name", "value") self.assertEqual("value", v.get()) + def test_get_null(self): + v = StringVar(self.root, "abc\x00def", "name") + self.assertEqual("abc\x00def", v.get()) + self.root.globalsetvar("name", "val\x00ue") + self.assertEqual("val\x00ue", v.get()) + class TestIntVar(TestBase): diff --git a/Misc/NEWS b/Misc/NEWS index 66137276a1..66d9db7952 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -45,6 +45,9 @@ Core and Builtins Library ------- +- Issue #20368: The null character now correctly passed from Tcl to Python. + Improved error handling in variables-related commands. + - Issue #20435: Fix _pyio.StringIO.getvalue() to take into account newline translation settings. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 6bf0b69164..b106b4b901 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -418,6 +418,51 @@ Merge(PyObject *args) +static PyObject * +unicodeFromTclStringAndSize(const char *s, Py_ssize_t size) +{ + PyObject *r = PyUnicode_DecodeUTF8(s, size, NULL); + if (!r && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { + /* Tcl encodes null character as \xc0\x80 */ + if (memchr(s, '\xc0', size)) { + char *buf, *q; + const char *e = s + size; + PyErr_Clear(); + q = buf = (char *)PyMem_Malloc(size); + if (buf == NULL) + return NULL; + while (s != e) { + if (s + 1 != e && s[0] == '\xc0' && s[1] == '\x80') { + *q++ = '\0'; + s += 2; + } + else + *q++ = *s++; + } + s = buf; + size = q - s; + r = PyUnicode_DecodeUTF8(s, size, NULL); + PyMem_Free(buf); + } + } + return r; +} + +static PyObject * +unicodeFromTclString(const char *s) +{ + return unicodeFromTclStringAndSize(s, strlen(s)); +} + +static PyObject * +unicodeFromTclObj(Tcl_Obj *value) +{ + int len; + char *s = Tcl_GetStringFromObj(value, &len); + return unicodeFromTclStringAndSize(s, len); +} + + static PyObject * Split(char *list) { @@ -435,13 +480,13 @@ Split(char *list) * Could be a quoted string containing funnies, e.g. {"}. * Return the string itself. */ - return PyUnicode_FromString(list); + return unicodeFromTclString(list); } if (argc == 0) v = PyUnicode_FromString(""); else if (argc == 1) - v = PyUnicode_FromString(argv[0]); + v = unicodeFromTclString(argv[0]); else if ((v = PyTuple_New(argc)) != NULL) { int i; PyObject *w; @@ -780,11 +825,8 @@ PyDoc_STRVAR(PyTclObject_string__doc__, static PyObject * PyTclObject_string(PyTclObject *self, void *ignored) { - char *s; - int len; if (!self->string) { - s = Tcl_GetStringFromObj(self->value, &len); - self->string = PyUnicode_FromStringAndSize(s, len); + self->string = unicodeFromTclObj(self->value); if (!self->string) return NULL; } @@ -795,15 +837,12 @@ PyTclObject_string(PyTclObject *self, void *ignored) static PyObject * PyTclObject_str(PyTclObject *self, void *ignored) { - char *s; - int len; - if (self->string && PyUnicode_Check(self->string)) { + if (self->string) { Py_INCREF(self->string); return self->string; } /* XXX Could chache result if it is non-ASCII. */ - s = Tcl_GetStringFromObj(self->value, &len); - return PyUnicode_DecodeUTF8(s, len, "strict"); + return unicodeFromTclObj(self->value); } static PyObject * @@ -873,7 +912,7 @@ PyDoc_STRVAR(get_typename__doc__, "name of the Tcl type"); static PyObject* get_typename(PyTclObject* obj, void* ignored) { - return PyUnicode_FromString(obj->value->typePtr->name); + return unicodeFromTclString(obj->value->typePtr->name); } @@ -985,6 +1024,8 @@ AsObj(PyObject *value) return NULL; } kind = PyUnicode_KIND(value); + if (kind == sizeof(Tcl_UniChar)) + return Tcl_NewUnicodeObj(inbuf, size); allocsize = ((size_t)size) * sizeof(Tcl_UniChar); outbuf = (Tcl_UniChar*)ckalloc(allocsize); /* Else overflow occurred, and we take the next exit */ @@ -1035,8 +1076,7 @@ FromObj(PyObject* tkapp, Tcl_Obj *value) TkappObject *app = (TkappObject*)tkapp; if (value->typePtr == NULL) { - return PyUnicode_FromStringAndSize(value->bytes, - value->length); + return unicodeFromTclStringAndSize(value->bytes, value->length); } if (value->typePtr == app->BooleanType) { @@ -1093,15 +1133,9 @@ FromObj(PyObject* tkapp, Tcl_Obj *value) } if (value->typePtr == app->StringType) { -#if TCL_UTF_MAX==3 - return PyUnicode_FromKindAndData( - PyUnicode_2BYTE_KIND, Tcl_GetUnicode(value), - Tcl_GetCharLength(value)); -#else return PyUnicode_FromKindAndData( - PyUnicode_4BYTE_KIND, Tcl_GetUnicode(value), + sizeof(Tcl_UniChar), Tcl_GetUnicode(value), Tcl_GetCharLength(value)); -#endif } return newPyTclObject(value); @@ -1195,8 +1229,8 @@ static PyObject* Tkapp_CallResult(TkappObject *self) { PyObject *res = NULL; + Tcl_Obj *value = Tcl_GetObjResult(self->interp); if(self->wantobjects) { - Tcl_Obj *value = Tcl_GetObjResult(self->interp); /* Not sure whether the IncrRef is necessary, but something may overwrite the interpreter result while we are converting it. */ @@ -1204,7 +1238,7 @@ Tkapp_CallResult(TkappObject *self) res = FromObj((PyObject*)self, value); Tcl_DecrRefCount(value); } else { - res = PyUnicode_FromString(Tcl_GetStringResult(self->interp)); + res = unicodeFromTclObj(value); } return res; } @@ -1395,7 +1429,7 @@ Tkapp_Eval(PyObject *self, PyObject *args) if (err == TCL_ERROR) res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = unicodeFromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1445,9 +1479,8 @@ Tkapp_EvalFile(PyObject *self, PyObject *args) ENTER_OVERLAP if (err == TCL_ERROR) res = Tkinter_Error(self); - else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = unicodeFromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1470,7 +1503,7 @@ Tkapp_Record(PyObject *self, PyObject *args) if (err == TCL_ERROR) res = Tkinter_Error(self); else - res = PyUnicode_FromString(Tkapp_Result(self)); + res = unicodeFromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1517,20 +1550,42 @@ typedef struct VarEvent { static int varname_converter(PyObject *in, void *_out) { + char *s; char **out = (char**)_out; if (PyBytes_Check(in)) { - *out = PyBytes_AsString(in); + if (PyBytes_Size(in) > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "bytes object is too long"); + return 0; + } + s = PyBytes_AsString(in); + if (strlen(s) != PyBytes_Size(in)) { + PyErr_SetString(PyExc_ValueError, "null byte in bytes object"); + return 0; + } + *out = s; return 1; } if (PyUnicode_Check(in)) { - *out = _PyUnicode_AsString(in); + Py_ssize_t size; + s = PyUnicode_AsUTF8AndSize(in, &size); + if (size > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, "string is too long"); + return 0; + } + if (strlen(s) != size) { + PyErr_SetString(PyExc_ValueError, "null character in string"); + return 0; + } + *out = s; return 1; } if (PyTclObject_Check(in)) { *out = PyTclObject_TclString(in); return 1; } - /* XXX: Should give diagnostics. */ + PyErr_Format(PyExc_TypeError, + "must be str, bytes or Tcl_Obj, not %.50s", + in->ob_type->tp_name); return 0; } @@ -1616,8 +1671,11 @@ SetVar(PyObject *self, PyObject *args, int flags) PyObject *res = NULL; Tcl_Obj *newval, *ok; - if (PyArg_ParseTuple(args, "O&O:setvar", - varname_converter, &name1, &newValue)) { + switch (PyTuple_GET_SIZE(args)) { + case 2: + if (!PyArg_ParseTuple(args, "O&O:setvar", + varname_converter, &name1, &newValue)) + return NULL; /* XXX Acquire tcl lock??? */ newval = AsObj(newValue); if (newval == NULL) @@ -1633,27 +1691,27 @@ SetVar(PyObject *self, PyObject *args, int flags) Py_INCREF(res); } LEAVE_OVERLAP_TCL - } - else { - PyErr_Clear(); - if (PyArg_ParseTuple(args, "ssO:setvar", - &name1, &name2, &newValue)) { - /* XXX must hold tcl lock already??? */ - newval = AsObj(newValue); - ENTER_TCL - ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags); - ENTER_OVERLAP - if (!ok) - Tkinter_Error(self); - else { - res = Py_None; - Py_INCREF(res); - } - LEAVE_OVERLAP_TCL - } - else { + break; + case 3: + if (!PyArg_ParseTuple(args, "ssO:setvar", + &name1, &name2, &newValue)) return NULL; + /* XXX must hold tcl lock already??? */ + newval = AsObj(newValue); + ENTER_TCL + ok = Tcl_SetVar2Ex(Tkapp_Interp(self), name1, name2, newval, flags); + ENTER_OVERLAP + if (!ok) + Tkinter_Error(self); + else { + res = Py_None; + Py_INCREF(res); } + LEAVE_OVERLAP_TCL + break; + default: + PyErr_SetString(PyExc_TypeError, "setvar requires 2 to 3 arguments"); + return NULL; } return res; } @@ -1693,7 +1751,7 @@ GetVar(PyObject *self, PyObject *args, int flags) res = FromObj(self, tres); } else { - res = PyUnicode_FromString(Tcl_GetString(tres)); + res = unicodeFromTclObj(tres); } } LEAVE_OVERLAP_TCL @@ -1831,7 +1889,7 @@ Tkapp_ExprString(PyObject *self, PyObject *args) if (retval == TCL_ERROR) res = Tkinter_Error(self); else - res = Py_BuildValue("s", Tkapp_Result(self)); + res = unicodeFromTclString(Tkapp_Result(self)); LEAVE_OVERLAP_TCL return res; } @@ -1956,7 +2014,7 @@ Tkapp_SplitList(PyObject *self, PyObject *args) goto finally; for (i = 0; i < argc; i++) { - PyObject *s = PyUnicode_FromString(argv[i]); + PyObject *s = unicodeFromTclString(argv[i]); if (!s || PyTuple_SetItem(v, i, s)) { Py_DECREF(v); v = NULL; @@ -2075,20 +2133,8 @@ PythonCmd(ClientData clientData, Tcl_Interp *interp, int argc, char *argv[]) return PythonCmd_Error(interp); for (i = 0; i < (argc - 1); i++) { - PyObject *s = PyUnicode_FromString(argv[i + 1]); - if (!s) { - /* Is Tk leaking 0xC080 in %A - a "modified" utf-8 null? */ - if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError) && - !strcmp(argv[i + 1], "\xC0\x80")) { - PyErr_Clear(); - /* Convert to "strict" utf-8 null */ - s = PyUnicode_FromString("\0"); - } else { - Py_DECREF(arg); - return PythonCmd_Error(interp); - } - } - if (PyTuple_SetItem(arg, i, s)) { + PyObject *s = unicodeFromTclString(argv[i + 1]); + if (!s || PyTuple_SetItem(arg, i, s)) { Py_DECREF(arg); return PythonCmd_Error(interp); } -- 2.40.0