self.assertEqual(b.pi('target'), (len('target'), None))
self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
+ def test_late_tail(self):
+ # Issue #37399: The tail of an ignored comment could overwrite the text before it.
+ class TreeBuilderSubclass(ET.TreeBuilder):
+ pass
+
+ xml = "<a>text<!-- comment -->tail</a>"
+ a = ET.fromstring(xml)
+ self.assertEqual(a.text, "texttail")
+
+ parser = ET.XMLParser(target=TreeBuilderSubclass())
+ parser.feed(xml)
+ a = parser.close()
+ self.assertEqual(a.text, "texttail")
+
+ xml = "<a>text<?pi data?>tail</a>"
+ a = ET.fromstring(xml)
+ self.assertEqual(a.text, "texttail")
+
+ xml = "<a>text<?pi data?>tail</a>"
+ parser = ET.XMLParser(target=TreeBuilderSubclass())
+ parser.feed(xml)
+ a = parser.close()
+ self.assertEqual(a.text, "texttail")
+
+ def test_late_tail_mix_pi_comments(self):
+ # Issue #37399: The tail of an ignored comment could overwrite the text before it.
+ # Test appending tails to comments/pis.
+ class TreeBuilderSubclass(ET.TreeBuilder):
+ pass
+
+ xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
+ parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
+ parser.feed(xml)
+ a = parser.close()
+ self.assertEqual(a[0].text, ' comment ')
+ self.assertEqual(a[0].tail, '\ntail')
+ self.assertEqual(a.text, "text ")
+
+ parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
+ parser.feed(xml)
+ a = parser.close()
+ self.assertEqual(a[0].text, ' comment ')
+ self.assertEqual(a[0].tail, '\ntail')
+ self.assertEqual(a.text, "text ")
+
+ xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
+ parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
+ parser.feed(xml)
+ a = parser.close()
+ self.assertEqual(a[0].text, 'pi data')
+ self.assertEqual(a[0].tail, 'tail')
+ self.assertEqual(a.text, "text\n")
+
+ parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
+ parser.feed(xml)
+ a = parser.close()
+ self.assertEqual(a[0].text, 'pi data')
+ self.assertEqual(a[0].tail, 'tail')
+ self.assertEqual(a.text, "text\n")
+
def test_treebuilder_elementfactory_none(self):
parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
parser.feed(self.sample1)
PyObject *this; /* current node */
PyObject *last; /* most recently created node */
+ PyObject *last_for_tail; /* most recently created node that takes a tail */
PyObject *data; /* data collector (string or list), or NULL */
Py_VISIT(self->root);
Py_VISIT(self->this);
Py_VISIT(self->last);
+ Py_VISIT(self->last_for_tail);
Py_VISIT(self->data);
Py_VISIT(self->stack);
Py_VISIT(self->pi_factory);
Py_CLEAR(self->stack);
Py_CLEAR(self->data);
Py_CLEAR(self->last);
+ Py_CLEAR(self->last_for_tail);
Py_CLEAR(self->this);
Py_CLEAR(self->pi_factory);
Py_CLEAR(self->comment_factory);
}
static int
-treebuilder_set_element_text_or_tail(PyObject *element, PyObject **data,
- PyObject **dest, _Py_Identifier *name)
+treebuilder_extend_element_text_or_tail(PyObject *element, PyObject **data,
+ PyObject **dest, _Py_Identifier *name)
{
+ /* Fast paths for the "almost always" cases. */
if (Element_CheckExact(element)) {
- PyObject *tmp = JOIN_OBJ(*dest);
- *dest = JOIN_SET(*data, PyList_CheckExact(*data));
- *data = NULL;
- Py_DECREF(tmp);
- return 0;
+ PyObject *dest_obj = JOIN_OBJ(*dest);
+ if (dest_obj == Py_None) {
+ *dest = JOIN_SET(*data, PyList_CheckExact(*data));
+ *data = NULL;
+ Py_DECREF(dest_obj);
+ return 0;
+ }
+ else if (JOIN_GET(*dest)) {
+ if (PyList_SetSlice(dest_obj, PY_SSIZE_T_MAX, PY_SSIZE_T_MAX, *data) < 0) {
+ return -1;
+ }
+ Py_CLEAR(*data);
+ return 0;
+ }
}
- else {
- PyObject *joined = list_join(*data);
+
+ /* Fallback for the non-Element / non-trivial cases. */
+ {
int r;
- if (joined == NULL)
+ PyObject* joined;
+ PyObject* previous = _PyObject_GetAttrId(element, name);
+ if (!previous)
+ return -1;
+ joined = list_join(*data);
+ if (!joined) {
+ Py_DECREF(previous);
return -1;
+ }
+ if (previous != Py_None) {
+ PyObject *tmp = PyNumber_Add(previous, joined);
+ Py_DECREF(joined);
+ Py_DECREF(previous);
+ if (!tmp)
+ return -1;
+ joined = tmp;
+ } else {
+ Py_DECREF(previous);
+ }
+
r = _PyObject_SetAttrId(element, name, joined);
Py_DECREF(joined);
if (r < 0)
LOCAL(int)
treebuilder_flush_data(TreeBuilderObject* self)
{
- PyObject *element = self->last;
-
if (!self->data) {
return 0;
}
- if (self->this == element) {
+ if (!self->last_for_tail) {
+ PyObject *element = self->last;
_Py_IDENTIFIER(text);
- return treebuilder_set_element_text_or_tail(
+ return treebuilder_extend_element_text_or_tail(
element, &self->data,
&((ElementObject *) element)->text, &PyId_text);
}
else {
+ PyObject *element = self->last_for_tail;
_Py_IDENTIFIER(tail);
- return treebuilder_set_element_text_or_tail(
+ return treebuilder_extend_element_text_or_tail(
element, &self->data,
&((ElementObject *) element)->tail, &PyId_tail);
}
}
this = self->this;
+ Py_CLEAR(self->last_for_tail);
if (this != Py_None) {
if (treebuilder_add_subelement(this, node) < 0)
item = self->last;
self->last = self->this;
+ Py_INCREF(self->last);
+ Py_XSETREF(self->last_for_tail, self->last);
self->index--;
self->this = PyList_GET_ITEM(self->stack, self->index);
Py_INCREF(self->this);
LOCAL(PyObject*)
treebuilder_handle_comment(TreeBuilderObject* self, PyObject* text)
{
- PyObject* comment = NULL;
+ PyObject* comment;
PyObject* this;
if (treebuilder_flush_data(self) < 0) {
if (self->insert_comments && this != Py_None) {
if (treebuilder_add_subelement(this, comment) < 0)
goto error;
+ Py_INCREF(comment);
+ Py_XSETREF(self->last_for_tail, comment);
}
} else {
Py_INCREF(text);
LOCAL(PyObject*)
treebuilder_handle_pi(TreeBuilderObject* self, PyObject* target, PyObject* text)
{
- PyObject* pi = NULL;
+ PyObject* pi;
PyObject* this;
PyObject* stack[2] = {target, text};
if (self->insert_pis && this != Py_None) {
if (treebuilder_add_subelement(this, pi) < 0)
goto error;
+ Py_INCREF(pi);
+ Py_XSETREF(self->last_for_tail, pi);
}
} else {
pi = PyTuple_Pack(2, target, text);
static void
expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
{
- PyObject* comment = NULL;
- PyObject* res = NULL;
+ PyObject* comment;
+ PyObject* res;
if (PyErr_Occurred())
return;
return; /* parser will look for errors */
res = treebuilder_handle_comment(target, comment);
+ Py_XDECREF(res);
+ Py_DECREF(comment);
} else if (self->handle_comment) {
comment = PyUnicode_DecodeUTF8(comment_in, strlen(comment_in), "strict");
if (!comment)
return;
res = _PyObject_FastCall(self->handle_comment, &comment, 1);
+ Py_XDECREF(res);
+ Py_DECREF(comment);
}
-
- Py_XDECREF(res);
- Py_DECREF(comment);
}
static void
expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
const XML_Char* data_in)
{
- PyObject* pi_target = NULL;
+ PyObject* pi_target;
PyObject* data;
PyObject* res;
PyObject* stack[2];
/* shortcut */
TreeBuilderObject *target = (TreeBuilderObject*) self->target;
- if (target->events_append && target->pi_event_obj) {
+ if ((target->events_append && target->pi_event_obj) || target->insert_pis) {
pi_target = PyUnicode_DecodeUTF8(target_in, strlen(target_in), "strict");
if (!pi_target)
goto error;