From: Serhiy Storchaka Date: Mon, 4 Feb 2013 16:28:01 +0000 (+0200) Subject: Issue #17089: Expat parser now correctly works with string input not only when X-Git-Tag: v3.3.1rc1~226 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=36b365ccff21cdc563a3f4209b0bbaa079572487;p=python Issue #17089: Expat parser now correctly works with string input not only when an internal XML encoding is UTF-8 or US-ASCII. It now accepts bytes and strings larger than 2 GiB. --- 36b365ccff21cdc563a3f4209b0bbaa079572487 diff --cc Misc/NEWS index 8ca4b01c61,07a33cee71..ce24d0e4ab --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -163,6 -212,13 +163,10 @@@ Core and Builtin Library ------- + - Issue #17089: Expat parser now correctly works with string input not only when + an internal XML encoding is UTF-8 or US-ASCII. It now accepts bytes and + strings larger than 2 GiB. + -- Issue #16903: Popen.communicate() on Unix now accepts strings when - universal_newlines is true as on Windows. - - Issue #6083: Fix multiple segmentation faults occured when PyArg_ParseTuple parses nested mutating sequence. diff --cc Modules/pyexpat.c index 3f59f0fdc1,9d22d3a051..022b0cbaf9 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@@ -781,14 -782,47 +783,44 @@@ Parse XML data. `isfinal' should be tr static PyObject * xmlparse_Parse(xmlparseobject *self, PyObject *args) { - char *s; - int slen; + PyObject *data; int isFinal = 0; + const char *s; + Py_ssize_t slen; + Py_buffer view; + int rc; - if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal)) + if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal)) return NULL; - return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal)); + if (PyUnicode_Check(data)) { - PyObject *bytes; - bytes = PyUnicode_AsUTF8String(data); - if (bytes == NULL) - return NULL; + view.buf = NULL; - s = PyBytes_AS_STRING(bytes); - slen = PyBytes_GET_SIZE(bytes); ++ s = PyUnicode_AsUTF8AndSize(data, &slen); ++ if (s == NULL) ++ return NULL; + /* Explicitly set UTF-8 encoding. Return code ignored. */ + (void)XML_SetEncoding(self->itself, "utf-8"); + } + else { + if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0) + return NULL; + s = view.buf; + slen = view.len; + } + + while (slen > MAX_CHUNK_SIZE) { + rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0); + if (!rc) + goto done; + s += MAX_CHUNK_SIZE; + slen -= MAX_CHUNK_SIZE; + } + rc = XML_Parse(self->itself, s, slen, isFinal); + + done: + if (view.buf != NULL) + PyBuffer_Release(&view); + return get_parse_result(self, rc); } /* File reading copied from cPickle */