sample1 = ('<!DOCTYPE html PUBLIC'
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
- '<html>text</html>')
+ '<html>text<div>subtext</div>tail</html>')
sample2 = '''<toplevel>sometext</toplevel>'''
+ def _check_sample1_element(self, e):
+ self.assertEqual(e.tag, 'html')
+ self.assertEqual(e.text, 'text')
+ self.assertEqual(e.tail, None)
+ self.assertEqual(e.attrib, {})
+ children = list(e)
+ self.assertEqual(len(children), 1)
+ child = children[0]
+ self.assertEqual(child.tag, 'div')
+ self.assertEqual(child.text, 'subtext')
+ self.assertEqual(child.tail, 'tail')
+ self.assertEqual(child.attrib, {})
+
def test_dummy_builder(self):
class BaseDummyBuilder:
def close(self):
parser.feed(self.sample1)
e = parser.close()
- self.assertEqual(e.tag, 'html')
+ self._check_sample1_element(e)
def test_element_factory(self):
lst = []
self.assertEqual(lst, ['toplevel'])
+ def _check_element_factory_class(self, cls):
+ tb = ET.TreeBuilder(element_factory=cls)
+
+ parser = ET.XMLParser(target=tb)
+ parser.feed(self.sample1)
+ e = parser.close()
+ self.assertIsInstance(e, cls)
+ self._check_sample1_element(e)
+
+ def test_element_factory_subclass(self):
+ class MyElement(ET.Element):
+ pass
+ self._check_element_factory_class(MyElement)
+
+ def test_element_factory_pure_python_subclass(self):
+ # Mimick SimpleTAL's behaviour (issue #16089): both versions of
+ # TreeBuilder should be able to cope with a subclass of the
+ # pure Python Element class.
+ base = ET._Element
+ # Not from a C extension
+ self.assertEqual(base.__module__, 'xml.etree.ElementTree')
+ # Force some multiple inheritance with a C class to make things
+ # more interesting.
+ class MyElement(base, ValueError):
+ pass
+ self._check_element_factory_class(MyElement)
+
def test_doctype(self):
class DoctypeParser:
_doctype = None
PyObject *root; /* root node (first created node) */
- ElementObject *this; /* current node */
- ElementObject *last; /* most recently created node */
+ PyObject *this; /* current node */
+ PyObject *last; /* most recently created node */
PyObject *data; /* data collector (string or list), or NULL */
t->root = NULL;
Py_INCREF(Py_None);
- t->this = (ElementObject *)Py_None;
+ t->this = Py_None;
Py_INCREF(Py_None);
- t->last = (ElementObject *)Py_None;
+ t->last = Py_None;
t->data = NULL;
t->element_factory = NULL;
Py_TYPE(self)->tp_free((PyObject *)self);
}
+/* -------------------------------------------------------------------- */
+/* helpers for handling of arbitrary element-like objects */
+
+static int
+treebuilder_set_element_text_or_tail(PyObject *element, PyObject *data,
+ PyObject **dest, _Py_Identifier *name)
+{
+ if (Element_CheckExact(element)) {
+ Py_DECREF(JOIN_OBJ(*dest));
+ *dest = JOIN_SET(data, PyList_CheckExact(data));
+ return 0;
+ }
+ else {
+ PyObject *joined = list_join(data);
+ int r;
+ if (joined == NULL)
+ return -1;
+ r = _PyObject_SetAttrId(element, name, joined);
+ Py_DECREF(joined);
+ return r;
+ }
+}
+
+/* These two functions steal a reference to data */
+static int
+treebuilder_set_element_text(PyObject *element, PyObject *data)
+{
+ _Py_IDENTIFIER(text);
+ return treebuilder_set_element_text_or_tail(
+ element, data, &((ElementObject *) element)->text, &PyId_text);
+}
+
+static int
+treebuilder_set_element_tail(PyObject *element, PyObject *data)
+{
+ _Py_IDENTIFIER(tail);
+ return treebuilder_set_element_text_or_tail(
+ element, data, &((ElementObject *) element)->tail, &PyId_tail);
+}
+
+static int
+treebuilder_add_subelement(PyObject *element, PyObject *child)
+{
+ _Py_IDENTIFIER(append);
+ if (Element_CheckExact(element)) {
+ ElementObject *elem = (ElementObject *) element;
+ return element_add_subelement(elem, child);
+ }
+ else {
+ PyObject *res;
+ res = _PyObject_CallMethodId(element, &PyId_append, "O", child);
+ if (res == NULL)
+ return -1;
+ Py_DECREF(res);
+ return 0;
+ }
+}
+
/* -------------------------------------------------------------------- */
/* handlers */
if (self->data) {
if (self->this == self->last) {
- Py_DECREF(JOIN_OBJ(self->last->text));
- self->last->text = JOIN_SET(
- self->data, PyList_CheckExact(self->data)
- );
- } else {
- Py_DECREF(JOIN_OBJ(self->last->tail));
- self->last->tail = JOIN_SET(
- self->data, PyList_CheckExact(self->data)
- );
+ if (treebuilder_set_element_text(self->last, self->data))
+ return NULL;
+ }
+ else {
+ if (treebuilder_set_element_tail(self->last, self->data))
+ return NULL;
}
self->data = NULL;
}
return NULL;
}
- this = (PyObject*) self->this;
+ this = self->this;
if (this != Py_None) {
- if (element_add_subelement((ElementObject*) this, node) < 0)
+ if (treebuilder_add_subelement(this, node) < 0)
goto error;
} else {
if (self->root) {
Py_DECREF(this);
Py_INCREF(node);
- self->this = (ElementObject*) node;
+ self->this = node;
Py_DECREF(self->last);
Py_INCREF(node);
- self->last = (ElementObject*) node;
+ self->last = node;
if (self->start_event_obj) {
PyObject* res;
treebuilder_handle_data(TreeBuilderObject* self, PyObject* data)
{
if (!self->data) {
- if (self->last == (ElementObject*) Py_None) {
+ if (self->last == Py_None) {
/* ignore calls to data before the first call to start */
Py_RETURN_NONE;
}
if (self->data) {
if (self->this == self->last) {
- Py_DECREF(JOIN_OBJ(self->last->text));
- self->last->text = JOIN_SET(
- self->data, PyList_CheckExact(self->data)
- );
+ if (treebuilder_set_element_text(self->last, self->data))
+ return NULL;
} else {
- Py_DECREF(JOIN_OBJ(self->last->tail));
- self->last->tail = JOIN_SET(
- self->data, PyList_CheckExact(self->data)
- );
+ if (treebuilder_set_element_tail(self->last, self->data))
+ return NULL;
}
self->data = NULL;
}
Py_DECREF(self->last);
- self->last = (ElementObject*) self->this;
- self->this = (ElementObject*) item;
+ self->last = self->this;
+ self->this = item;
if (self->end_event_obj) {
PyObject* res;