from test import support
from test.support import findfile, import_fresh_module, gc_collect
-pyET = import_fresh_module('xml.etree.ElementTree', blocked=['_elementtree'])
+pyET = None
+ET = None
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
try:
These methods return an iterable. See bug 6472.
- >>> check_method(element.iter("tag").__next__)
>>> check_method(element.iterfind("tag").__next__)
>>> check_method(element.iterfind("*").__next__)
- >>> check_method(tree.iter("tag").__next__)
>>> check_method(tree.iterfind("tag").__next__)
>>> check_method(tree.iterfind("*").__next__)
'<tag>hello</tag>'
"""
-# Only with Python implementation
-def simplefind():
- """
- Test find methods using the elementpath fallback.
-
- >>> ElementTree = pyET
-
- >>> CurrentElementPath = ElementTree.ElementPath
- >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
- >>> elem = ElementTree.XML(SAMPLE_XML)
- >>> elem.find("tag").tag
- 'tag'
- >>> ElementTree.ElementTree(elem).find("tag").tag
- 'tag'
- >>> elem.findtext("tag")
- 'text'
- >>> elem.findtext("tog")
- >>> elem.findtext("tog", "default")
- 'default'
- >>> ElementTree.ElementTree(elem).findtext("tag")
- 'text'
- >>> summarize_list(elem.findall("tag"))
- ['tag', 'tag']
- >>> summarize_list(elem.findall(".//tag"))
- ['tag', 'tag', 'tag']
-
- Path syntax doesn't work in this case.
-
- >>> elem.find("section/tag")
- >>> elem.findtext("section/tag")
- >>> summarize_list(elem.findall("section/tag"))
- []
-
- >>> ElementTree.ElementPath = CurrentElementPath
- """
-
def find():
"""
Test find methods (including xpath syntax).
'1 < 2\n'
"""
-def iterators():
- """
- Test iterators.
-
- >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
- >>> summarize_list(e.iter())
- ['html', 'body', 'i']
- >>> summarize_list(e.find("body").iter())
- ['body', 'i']
- >>> summarize(next(e.iter()))
- 'html'
- >>> "".join(e.itertext())
- 'this is a paragraph...'
- >>> "".join(e.find("body").itertext())
- 'this is a paragraph.'
- >>> next(e.itertext())
- 'this is a '
-
- Method iterparse should return an iterator. See bug 6472.
-
- >>> sourcefile = serialize(e, to_string=False)
- >>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
- ('end', <Element 'i' at 0x...>)
-
- >>> tree = ET.ElementTree(None)
- >>> tree.iter()
- Traceback (most recent call last):
- AttributeError: 'NoneType' object has no attribute 'iter'
- """
-
ENTITY_XML = """\
<!DOCTYPE points [
<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
</document>
""".format(html.escape(SIMPLE_XMLFILE, True))
+
def xinclude_loader(href, parse="xml", encoding=None):
try:
data = XINCLUDE[href]
>>> # print(serialize(document)) # C5
"""
-def xinclude_default():
- """
- >>> from xml.etree import ElementInclude
-
- >>> document = xinclude_loader("default.xml")
- >>> ElementInclude.include(document)
- >>> print(serialize(document)) # default
- <document>
- <p>Example.</p>
- <root>
- <element key="value">text</element>
- <element>text</element>tail
- <empty-element />
- </root>
- </document>
- """
#
# badly formatted xi:include tags
self.assertIsInstance(ET.QName, type)
self.assertIsInstance(ET.ElementTree, type)
self.assertIsInstance(ET.Element, type)
- # XXX issue 14128 with C ElementTree
- # self.assertIsInstance(ET.TreeBuilder, type)
- # self.assertIsInstance(ET.XMLParser, type)
+ self.assertIsInstance(ET.TreeBuilder, type)
+ self.assertIsInstance(ET.XMLParser, type)
def test_Element_subclass_trivial(self):
class MyElement(ET.Element):
self.assertEqual(mye.newmethod(), 'joe')
+class ElementIterTest(unittest.TestCase):
+ def _ilist(self, elem, tag=None):
+ return summarize_list(elem.iter(tag))
+
+ def test_basic(self):
+ doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
+ self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
+ self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
+ self.assertEqual(next(doc.iter()).tag, 'html')
+ self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
+ self.assertEqual(''.join(doc.find('body').itertext()),
+ 'this is a paragraph.')
+ self.assertEqual(next(doc.itertext()), 'this is a ')
+
+ # iterparse should return an iterator
+ sourcefile = serialize(doc, to_string=False)
+ self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
+
+ tree = ET.ElementTree(None)
+ self.assertRaises(AttributeError, tree.iter)
+
+ def test_corners(self):
+ # single root, no subelements
+ a = ET.Element('a')
+ self.assertEqual(self._ilist(a), ['a'])
+
+ # one child
+ b = ET.SubElement(a, 'b')
+ self.assertEqual(self._ilist(a), ['a', 'b'])
+
+ # one child and one grandchild
+ c = ET.SubElement(b, 'c')
+ self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
+
+ # two children, only first with grandchild
+ d = ET.SubElement(a, 'd')
+ self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
+
+ # replace first child by second
+ a[0] = a[1]
+ del a[1]
+ self.assertEqual(self._ilist(a), ['a', 'd'])
+
+ def test_iter_by_tag(self):
+ doc = ET.XML('''
+ <document>
+ <house>
+ <room>bedroom1</room>
+ <room>bedroom2</room>
+ </house>
+ <shed>nothing here
+ </shed>
+ <house>
+ <room>bedroom8</room>
+ </house>
+ </document>''')
+
+ self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
+ self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
+
+ # make sure both tag=None and tag='*' return all tags
+ all_tags = ['document', 'house', 'room', 'room',
+ 'shed', 'house', 'room']
+ self.assertEqual(self._ilist(doc), all_tags)
+ self.assertEqual(self._ilist(doc, '*'), all_tags)
+
+
class TreeBuilderTest(unittest.TestCase):
sample1 = ('<!DOCTYPE html PUBLIC'
' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
-
+class XincludeTest(unittest.TestCase):
+ def test_xinclude_default(self):
+ from xml.etree import ElementInclude
+ doc = xinclude_loader('default.xml')
+ ElementInclude.include(doc)
+ s = serialize(doc)
+ self.assertEqual(s.strip(), '''<document>
+ <p>Example.</p>
+ <root>
+ <element key="value">text</element>
+ <element>text</element>tail
+ <empty-element />
+</root>
+</document>''')
class XMLParserTest(unittest.TestCase):
sample1 = '<file><line>22</line></file>'
sample2 = ('<!DOCTYPE html PUBLIC'
'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
-class NoAcceleratorTest(unittest.TestCase):
- # Test that the C accelerator was not imported for pyET
- def test_correct_import_pyET(self):
- self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
- self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
-
-
class NamespaceParseTest(unittest.TestCase):
def test_find_with_namespace(self):
nsmap = {'h': 'hello', 'f': 'foo'}
self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
-
class ElementSlicingTest(unittest.TestCase):
def _elem_tags(self, elemlist):
return [e.tag for e in elemlist]
with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
ET.Element('a', attrib="I'm not a dict")
+# --------------------------------------------------------------------
+
+@unittest.skipUnless(pyET, 'only for the Python version')
+class NoAcceleratorTest(unittest.TestCase):
+ # Test that the C accelerator was not imported for pyET
+ def test_correct_import_pyET(self):
+ self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
+ self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
+
+
+class ElementPathFallbackTest(unittest.TestCase):
+ def test_fallback(self):
+ current_ElementPath = ET.ElementPath
+ ET.ElementPath = ET._SimpleElementPath()
+ elem = ET.XML(SAMPLE_XML)
+ self.assertEqual(elem.find('tag').tag, 'tag')
+ self.assertEqual(ET.ElementTree(elem).find('tag').tag, 'tag')
+ self.assertEqual(elem.findtext('tag'), 'text')
+ self.assertIsNone(elem.findtext('tog'))
+ self.assertEqual(elem.findtext('tog', 'default'), 'default')
+ self.assertEqual(ET.ElementTree(elem).findtext('tag'), 'text')
+ self.assertEqual(summarize_list(elem.findall('tag')), ['tag', 'tag'])
+ self.assertEqual(summarize_list(elem.findall('.//tag')),
+ ['tag', 'tag', 'tag'])
+
+ self.assertIsNone(elem.find('section/tag'))
+ self.assertIsNone(elem.findtext('section/tag'))
+ self.assertEqual(summarize_list(elem.findall('section/tag')), [])
+
+ ET.ElementPath = current_ElementPath
# --------------------------------------------------------------------
self.checkwarnings.__exit__(*args)
-def test_main(module=pyET):
- from test import test_xml_etree
+def test_main(module=None):
+ # When invoked without a module, runs the Python ET tests by loading pyET.
+ # Otherwise, uses the given module as the ET.
+ if module is None:
+ global pyET
+ pyET = import_fresh_module('xml.etree.ElementTree',
+ blocked=['_elementtree'])
+ module = pyET
- # The same doctests are used for both the Python and the C implementations
- test_xml_etree.ET = module
+ global ET
+ ET = module
test_classes = [
ElementSlicingTest,
BasicElementTest,
StringIOTest,
ParseErrorTest,
+ XincludeTest,
ElementTreeTest,
- NamespaceParseTest,
+ ElementIterTest,
TreeBuilderTest,
- XMLParserTest,
- KeywordArgsTest]
- if module is pyET:
- # Run the tests specific to the Python implementation
- test_classes += [NoAcceleratorTest]
+ ]
+
+ # These tests will only run for the pure-Python version that doesn't import
+ # _elementtree. We can't use skipUnless here, because pyET is filled in only
+ # after the module is loaded.
+ if pyET:
+ test_classes.extend([
+ NoAcceleratorTest,
+ ElementPathFallbackTest,
+ ])
support.run_unittest(*test_classes)
# XXX the C module should give the same warnings as the Python module
with CleanContext(quiet=(module is not pyET)):
- support.run_doctest(test_xml_etree, verbosity=True)
+ support.run_doctest(sys.modules[__name__], verbosity=True)
if __name__ == '__main__':
test_main()
/* glue functions (see the init function for details) */
static PyObject* elementtree_parseerror_obj;
static PyObject* elementtree_deepcopy_obj;
-static PyObject* elementtree_iter_obj;
-static PyObject* elementtree_itertext_obj;
static PyObject* elementpath_obj;
/* helpers */
return list;
}
-static PyObject*
-element_iter(ElementObject* self, PyObject* args)
-{
- PyObject* result;
- PyObject* tag = Py_None;
- if (!PyArg_ParseTuple(args, "|O:iter", &tag))
- return NULL;
+static PyObject *
+create_elementiter(ElementObject *self, PyObject *tag, int gettext);
- if (!elementtree_iter_obj) {
- PyErr_SetString(
- PyExc_RuntimeError,
- "iter helper not found"
- );
- return NULL;
- }
- args = PyTuple_New(2);
- if (!args)
+static PyObject *
+element_iter(ElementObject *self, PyObject *args)
+{
+ PyObject* tag = Py_None;
+ if (!PyArg_ParseTuple(args, "|O:iter", &tag))
return NULL;
- Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
- Py_INCREF(tag); PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
-
- result = PyObject_CallObject(elementtree_iter_obj, args);
-
- Py_DECREF(args);
-
- return result;
+ return create_elementiter(self, tag, 0);
}
static PyObject*
element_itertext(ElementObject* self, PyObject* args)
{
- PyObject* result;
-
if (!PyArg_ParseTuple(args, ":itertext"))
return NULL;
- if (!elementtree_itertext_obj) {
- PyErr_SetString(
- PyExc_RuntimeError,
- "itertext helper not found"
- );
- return NULL;
- }
-
- args = PyTuple_New(1);
- if (!args)
- return NULL;
-
- Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
-
- result = PyObject_CallObject(elementtree_itertext_obj, args);
-
- Py_DECREF(args);
-
- return result;
+ return create_elementiter(self, Py_None, 1);
}
+
static PyObject*
element_getitem(PyObject* self_, Py_ssize_t index)
{
0, /* tp_free */
};
+/******************************* Element iterator ****************************/
+
+/* ElementIterObject represents the iteration state over an XML element in
+ * pre-order traversal. To keep track of which sub-element should be returned
+ * next, a stack of parents is maintained. This is a standard stack-based
+ * iterative pre-order traversal of a tree.
+ * The stack is managed using a single-linked list starting at parent_stack.
+ * Each stack node contains the saved parent to which we should return after
+ * the current one is exhausted, and the next child to examine in that parent.
+ */
+typedef struct ParentLocator_t {
+ ElementObject *parent;
+ Py_ssize_t child_index;
+ struct ParentLocator_t *next;
+} ParentLocator;
+
+typedef struct {
+ PyObject_HEAD
+ ParentLocator *parent_stack;
+ ElementObject *root_element;
+ PyObject *sought_tag;
+ int root_done;
+ int gettext;
+} ElementIterObject;
+
+
+static void
+elementiter_dealloc(ElementIterObject *it)
+{
+ ParentLocator *p = it->parent_stack;
+ while (p) {
+ ParentLocator *temp = p;
+ Py_XDECREF(p->parent);
+ p = p->next;
+ PyObject_Free(temp);
+ }
+
+ Py_XDECREF(it->sought_tag);
+ Py_XDECREF(it->root_element);
+
+ PyObject_GC_UnTrack(it);
+ PyObject_GC_Del(it);
+}
+
+static int
+elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
+{
+ ParentLocator *p = it->parent_stack;
+ while (p) {
+ Py_VISIT(p->parent);
+ p = p->next;
+ }
+
+ Py_VISIT(it->root_element);
+ Py_VISIT(it->sought_tag);
+ return 0;
+}
+
+/* Helper function for elementiter_next. Add a new parent to the parent stack.
+ */
+static ParentLocator *
+parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
+{
+ ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
+ if (new_node) {
+ new_node->parent = parent;
+ Py_INCREF(parent);
+ new_node->child_index = 0;
+ new_node->next = stack;
+ }
+ return new_node;
+}
+
+static PyObject *
+elementiter_next(ElementIterObject *it)
+{
+ /* Sub-element iterator.
+ *
+ * A short note on gettext: this function serves both the iter() and
+ * itertext() methods to avoid code duplication. However, there are a few
+ * small differences in the way these iterations work. Namely:
+ * - itertext() only yields text from nodes that have it, and continues
+ * iterating when a node doesn't have text (so it doesn't return any
+ * node like iter())
+ * - itertext() also has to handle tail, after finishing with all the
+ * children of a node.
+ */
+
+ while (1) {
+ /* Handle the case reached in the beginning and end of iteration, where
+ * the parent stack is empty. The root_done flag gives us indication
+ * whether we've just started iterating (so root_done is 0), in which
+ * case the root is returned. If root_done is 1 and we're here, the
+ * iterator is exhausted.
+ */
+ if (!it->parent_stack->parent) {
+ if (it->root_done) {
+ PyErr_SetNone(PyExc_StopIteration);
+ return NULL;
+ } else {
+ it->parent_stack = parent_stack_push_new(it->parent_stack,
+ it->root_element);
+ if (!it->parent_stack) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ it->root_done = 1;
+ if (it->sought_tag == Py_None ||
+ PyObject_RichCompareBool(it->root_element->tag,
+ it->sought_tag, Py_EQ) == 1) {
+ if (it->gettext) {
+ PyObject *text = JOIN_OBJ(it->root_element->text);
+ if (PyObject_IsTrue(text)) {
+ Py_INCREF(text);
+ return text;
+ }
+ } else {
+ Py_INCREF(it->root_element);
+ return (PyObject *)it->root_element;
+ }
+ }
+ }
+ }
+
+ /* See if there are children left to traverse in the current parent. If
+ * yes, visit the next child. If not, pop the stack and try again.
+ */
+ ElementObject *cur_parent = it->parent_stack->parent;
+ Py_ssize_t child_index = it->parent_stack->child_index;
+ if (cur_parent->extra && child_index < cur_parent->extra->length) {
+ ElementObject *child = (ElementObject *)
+ cur_parent->extra->children[child_index];
+ it->parent_stack->child_index++;
+ it->parent_stack = parent_stack_push_new(it->parent_stack,
+ child);
+ if (!it->parent_stack) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ if (it->gettext) {
+ PyObject *text = JOIN_OBJ(child->text);
+ if (PyObject_IsTrue(text)) {
+ Py_INCREF(text);
+ return text;
+ }
+ } else if (it->sought_tag == Py_None ||
+ PyObject_RichCompareBool(child->tag,
+ it->sought_tag, Py_EQ) == 1) {
+ Py_INCREF(child);
+ return (PyObject *)child;
+ }
+ else
+ continue;
+ }
+ else {
+ PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;
+ ParentLocator *next = it->parent_stack->next;
+ Py_XDECREF(it->parent_stack->parent);
+ PyObject_Free(it->parent_stack);
+ it->parent_stack = next;
+
+ /* Note that extra condition on it->parent_stack->parent here;
+ * this is because itertext() is supposed to only return *inner*
+ * text, not text following the element it began iteration with.
+ */
+ if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
+ Py_INCREF(tail);
+ return tail;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+static PyTypeObject ElementIter_Type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "_elementtree._element_iterator", /* tp_name */
+ sizeof(ElementIterObject), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)elementiter_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_reserved */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
+ 0, /* tp_doc */
+ (traverseproc)elementiter_traverse, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)elementiter_next, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+};
+
+
+static PyObject *
+create_elementiter(ElementObject *self, PyObject *tag, int gettext)
+{
+ ElementIterObject *it;
+ PyObject *star = NULL;
+
+ it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
+ if (!it)
+ return NULL;
+ if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
+ PyObject_GC_Del(it);
+ return NULL;
+ }
+
+ it->parent_stack->parent = NULL;
+ it->parent_stack->child_index = 0;
+ it->parent_stack->next = NULL;
+
+ if (PyUnicode_Check(tag))
+ star = PyUnicode_FromString("*");
+ else if (PyBytes_Check(tag))
+ star = PyBytes_FromString("*");
+
+ if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
+ tag = Py_None;
+
+ Py_XDECREF(star);
+ it->sought_tag = tag;
+ it->root_done = 0;
+ it->gettext = gettext;
+ it->root_element = self;
+
+ Py_INCREF(self);
+ Py_INCREF(tag);
+
+ PyObject_GC_Track(it);
+ return (PyObject *)it;
+}
+
+
/* ==================================================================== */
/* the tree builder type */
PyMODINIT_FUNC
PyInit__elementtree(void)
{
- PyObject *m, *g, *temp;
- char* bootstrap;
+ PyObject *m, *temp;
/* Initialize object types */
if (PyType_Ready(&TreeBuilder_Type) < 0)
if (!m)
return NULL;
- /* The code below requires that the module gets already added
- to sys.modules. */
- PyDict_SetItemString(PyImport_GetModuleDict(),
- _elementtreemodule.m_name,
- m);
-
- /* python glue code */
-
- g = PyDict_New();
- if (!g)
- return NULL;
-
- PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
-
- bootstrap = (
- "def iter(node, tag=None):\n" /* helper */
- " if tag == '*':\n"
- " tag = None\n"
- " if tag is None or node.tag == tag:\n"
- " yield node\n"
- " for node in node:\n"
- " for node in iter(node, tag):\n"
- " yield node\n"
-
- "def itertext(node):\n" /* helper */
- " if node.text:\n"
- " yield node.text\n"
- " for e in node:\n"
- " for s in e.itertext():\n"
- " yield s\n"
- " if e.tail:\n"
- " yield e.tail\n"
-
- );
-
- if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
- return NULL;
-
if (!(temp = PyImport_ImportModule("copy")))
return NULL;
elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
return NULL;
- elementtree_iter_obj = PyDict_GetItemString(g, "iter");
- elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
-
/* link against pyexpat */
expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
if (expat_capi) {