Replace the iter/itertext methods of Element in _elementtree with true C implementati...

author Eli Bendersky <eliben@gmail.com>

Fri, 15 Jun 2012 04:42:50 +0000 (07:42 +0300)

committer Eli Bendersky <eliben@gmail.com>

Fri, 15 Jun 2012 04:42:50 +0000 (07:42 +0300)
author Eli Bendersky <eliben@gmail.com>
Fri, 15 Jun 2012 04:42:50 +0000 (07:42 +0300)
committer Eli Bendersky <eliben@gmail.com>
Fri, 15 Jun 2012 04:42:50 +0000 (07:42 +0300)
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py

index 49a5633a830512d7de5f2b8d491ced1181761aad..bee63292568ea29dd21c1758701965755d811f02 100644 (file)
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -23,7 +23,8 @@ import weakref
  from test import support
  from test.support import findfile, import_fresh_module, gc_collect
  
-pyET = import_fresh_module('xml.etree.ElementTree', blocked=['_elementtree'])
+pyET = None
+ET = None
  
  SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
  try:
@@ -209,10 +210,8 @@ def interface():
  
      These methods return an iterable. See bug 6472.
  
-    >>> check_method(element.iter("tag").__next__)
      >>> check_method(element.iterfind("tag").__next__)
      >>> check_method(element.iterfind("*").__next__)
-    >>> check_method(tree.iter("tag").__next__)
      >>> check_method(tree.iterfind("tag").__next__)
      >>> check_method(tree.iterfind("*").__next__)
  
@@ -291,42 +290,6 @@ def cdata():
      '<tag>hello</tag>'
      """
  
-# Only with Python implementation
-def simplefind():
-    """
-    Test find methods using the elementpath fallback.
-
-    >>> ElementTree = pyET
-
-    >>> CurrentElementPath = ElementTree.ElementPath
-    >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
-    >>> elem = ElementTree.XML(SAMPLE_XML)
-    >>> elem.find("tag").tag
-    'tag'
-    >>> ElementTree.ElementTree(elem).find("tag").tag
-    'tag'
-    >>> elem.findtext("tag")
-    'text'
-    >>> elem.findtext("tog")
-    >>> elem.findtext("tog", "default")
-    'default'
-    >>> ElementTree.ElementTree(elem).findtext("tag")
-    'text'
-    >>> summarize_list(elem.findall("tag"))
-    ['tag', 'tag']
-    >>> summarize_list(elem.findall(".//tag"))
-    ['tag', 'tag', 'tag']
-
-    Path syntax doesn't work in this case.
-
-    >>> elem.find("section/tag")
-    >>> elem.findtext("section/tag")
-    >>> summarize_list(elem.findall("section/tag"))
-    []
-
-    >>> ElementTree.ElementPath = CurrentElementPath
-    """
-
  def find():
      """
      Test find methods (including xpath syntax).
@@ -1002,36 +965,6 @@ def methods():
      '1 < 2\n'
      """
  
-def iterators():
-    """
-    Test iterators.
-
-    >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
-    >>> summarize_list(e.iter())
-    ['html', 'body', 'i']
-    >>> summarize_list(e.find("body").iter())
-    ['body', 'i']
-    >>> summarize(next(e.iter()))
-    'html'
-    >>> "".join(e.itertext())
-    'this is a paragraph...'
-    >>> "".join(e.find("body").itertext())
-    'this is a paragraph.'
-    >>> next(e.itertext())
-    'this is a '
-
-    Method iterparse should return an iterator. See bug 6472.
-
-    >>> sourcefile = serialize(e, to_string=False)
-    >>> next(ET.iterparse(sourcefile))  # doctest: +ELLIPSIS
-    ('end', <Element 'i' at 0x...>)
-
-    >>> tree = ET.ElementTree(None)
-    >>> tree.iter()
-    Traceback (most recent call last):
-    AttributeError: 'NoneType' object has no attribute 'iter'
-    """
-
  ENTITY_XML = """\
  <!DOCTYPE points [
  <!ENTITY % user-entities SYSTEM 'user-entities.xml'>
@@ -1339,6 +1272,7 @@ XINCLUDE["default.xml"] = """\
  </document>
  """.format(html.escape(SIMPLE_XMLFILE, True))
  
+
  def xinclude_loader(href, parse="xml", encoding=None):
      try:
          data = XINCLUDE[href]
@@ -1411,22 +1345,6 @@ def xinclude():
      >>> # print(serialize(document)) # C5
      """
  
-def xinclude_default():
-    """
-    >>> from xml.etree import ElementInclude
-
-    >>> document = xinclude_loader("default.xml")
-    >>> ElementInclude.include(document)
-    >>> print(serialize(document)) # default
-    <document>
-      <p>Example.</p>
-      <root>
-       <element key="value">text</element>
-       <element>text</element>tail
-       <empty-element />
-    </root>
-    </document>
-    """
  
  #
  # badly formatted xi:include tags
@@ -1917,9 +1835,8 @@ class ElementTreeTest(unittest.TestCase):
          self.assertIsInstance(ET.QName, type)
          self.assertIsInstance(ET.ElementTree, type)
          self.assertIsInstance(ET.Element, type)
-        # XXX issue 14128 with C ElementTree
-        # self.assertIsInstance(ET.TreeBuilder, type)
-        # self.assertIsInstance(ET.XMLParser, type)
+        self.assertIsInstance(ET.TreeBuilder, type)
+        self.assertIsInstance(ET.XMLParser, type)
  
      def test_Element_subclass_trivial(self):
          class MyElement(ET.Element):
@@ -1953,6 +1870,73 @@ class ElementTreeTest(unittest.TestCase):
          self.assertEqual(mye.newmethod(), 'joe')
  
  
+class ElementIterTest(unittest.TestCase):
+    def _ilist(self, elem, tag=None):
+        return summarize_list(elem.iter(tag))
+
+    def test_basic(self):
+        doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
+        self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
+        self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
+        self.assertEqual(next(doc.iter()).tag, 'html')
+        self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
+        self.assertEqual(''.join(doc.find('body').itertext()),
+            'this is a paragraph.')
+        self.assertEqual(next(doc.itertext()), 'this is a ')
+
+        # iterparse should return an iterator
+        sourcefile = serialize(doc, to_string=False)
+        self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
+
+        tree = ET.ElementTree(None)
+        self.assertRaises(AttributeError, tree.iter)
+
+    def test_corners(self):
+        # single root, no subelements
+        a = ET.Element('a')
+        self.assertEqual(self._ilist(a), ['a'])
+
+        # one child
+        b = ET.SubElement(a, 'b')
+        self.assertEqual(self._ilist(a), ['a', 'b'])
+
+        # one child and one grandchild
+        c = ET.SubElement(b, 'c')
+        self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
+
+        # two children, only first with grandchild
+        d = ET.SubElement(a, 'd')
+        self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
+
+        # replace first child by second
+        a[0] = a[1]
+        del a[1]
+        self.assertEqual(self._ilist(a), ['a', 'd'])
+
+    def test_iter_by_tag(self):
+        doc = ET.XML('''
+            <document>
+                <house>
+                    <room>bedroom1</room>
+                    <room>bedroom2</room>
+                </house>
+                <shed>nothing here
+                </shed>
+                <house>
+                    <room>bedroom8</room>
+                </house>
+            </document>''')
+
+        self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
+        self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
+
+        # make sure both tag=None and tag='*' return all tags
+        all_tags = ['document', 'house', 'room', 'room',
+                    'shed', 'house', 'room']
+        self.assertEqual(self._ilist(doc), all_tags)
+        self.assertEqual(self._ilist(doc, '*'), all_tags)
+
+
  class TreeBuilderTest(unittest.TestCase):
      sample1 = ('<!DOCTYPE html PUBLIC'
          ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
@@ -2026,7 +2010,20 @@ class TreeBuilderTest(unittest.TestCase):
              ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
               'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
  
-
+class XincludeTest(unittest.TestCase):
+    def test_xinclude_default(self):
+        from xml.etree import ElementInclude
+        doc = xinclude_loader('default.xml')
+        ElementInclude.include(doc)
+        s = serialize(doc)
+        self.assertEqual(s.strip(), '''<document>
+  <p>Example.</p>
+  <root>
+   <element key="value">text</element>
+   <element>text</element>tail
+   <empty-element />
+</root>
+</document>''')
  class XMLParserTest(unittest.TestCase):
      sample1 = '<file><line>22</line></file>'
      sample2 = ('<!DOCTYPE html PUBLIC'
@@ -2073,13 +2070,6 @@ class XMLParserTest(unittest.TestCase):
               'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
  
  
-class NoAcceleratorTest(unittest.TestCase):
-    # Test that the C accelerator was not imported for pyET
-    def test_correct_import_pyET(self):
-        self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
-        self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
-
-
  class NamespaceParseTest(unittest.TestCase):
      def test_find_with_namespace(self):
          nsmap = {'h': 'hello', 'f': 'foo'}
@@ -2090,7 +2080,6 @@ class NamespaceParseTest(unittest.TestCase):
          self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
  
  
-
  class ElementSlicingTest(unittest.TestCase):
      def _elem_tags(self, elemlist):
          return [e.tag for e in elemlist]
@@ -2232,6 +2221,36 @@ class KeywordArgsTest(unittest.TestCase):
          with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
              ET.Element('a', attrib="I'm not a dict")
  
+# --------------------------------------------------------------------
+
+@unittest.skipUnless(pyET, 'only for the Python version')
+class NoAcceleratorTest(unittest.TestCase):
+    # Test that the C accelerator was not imported for pyET
+    def test_correct_import_pyET(self):
+        self.assertEqual(pyET.Element.__module__, 'xml.etree.ElementTree')
+        self.assertEqual(pyET.SubElement.__module__, 'xml.etree.ElementTree')
+
+
+class ElementPathFallbackTest(unittest.TestCase):
+    def test_fallback(self):
+        current_ElementPath = ET.ElementPath
+        ET.ElementPath = ET._SimpleElementPath()
+        elem = ET.XML(SAMPLE_XML)
+        self.assertEqual(elem.find('tag').tag, 'tag')
+        self.assertEqual(ET.ElementTree(elem).find('tag').tag, 'tag')
+        self.assertEqual(elem.findtext('tag'), 'text')
+        self.assertIsNone(elem.findtext('tog'))
+        self.assertEqual(elem.findtext('tog', 'default'), 'default')
+        self.assertEqual(ET.ElementTree(elem).findtext('tag'), 'text')
+        self.assertEqual(summarize_list(elem.findall('tag')), ['tag', 'tag'])
+        self.assertEqual(summarize_list(elem.findall('.//tag')),
+            ['tag', 'tag', 'tag'])
+
+        self.assertIsNone(elem.find('section/tag'))
+        self.assertIsNone(elem.findtext('section/tag'))
+        self.assertEqual(summarize_list(elem.findall('section/tag')), [])
+
+        ET.ElementPath = current_ElementPath
  
  # --------------------------------------------------------------------
  
@@ -2276,31 +2295,43 @@ class CleanContext(object):
          self.checkwarnings.__exit__(*args)
  
  
-def test_main(module=pyET):
-    from test import test_xml_etree
+def test_main(module=None):
+    # When invoked without a module, runs the Python ET tests by loading pyET.
+    # Otherwise, uses the given module as the ET.
+    if module is None:
+        global pyET
+        pyET = import_fresh_module('xml.etree.ElementTree',
+                                   blocked=['_elementtree'])
+        module = pyET
  
-    # The same doctests are used for both the Python and the C implementations
-    test_xml_etree.ET = module
+    global ET
+    ET = module
  
      test_classes = [
          ElementSlicingTest,
          BasicElementTest,
          StringIOTest,
          ParseErrorTest,
+        XincludeTest,
          ElementTreeTest,
-        NamespaceParseTest,
+        ElementIterTest,
          TreeBuilderTest,
-        XMLParserTest,
-        KeywordArgsTest]
-    if module is pyET:
-        # Run the tests specific to the Python implementation
-        test_classes += [NoAcceleratorTest]
+        ]
+
+    # These tests will only run for the pure-Python version that doesn't import
+    # _elementtree. We can't use skipUnless here, because pyET is filled in only
+    # after the module is loaded.
+    if pyET:
+        test_classes.extend([
+            NoAcceleratorTest,
+            ElementPathFallbackTest,
+            ])
  
      support.run_unittest(*test_classes)
  
      # XXX the C module should give the same warnings as the Python module
      with CleanContext(quiet=(module is not pyET)):
-        support.run_doctest(test_xml_etree, verbosity=True)
+        support.run_doctest(sys.modules[__name__], verbosity=True)
  
  if __name__ == '__main__':
      test_main()
diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py

index 10416d2591f045a0f51b81fee43fe73dcb6ad193..142a22ff980c93ff4fdf637f4a2ce97aa342c834 100644 (file)
--- a/Lib/test/test_xml_etree_c.py
+++ b/Lib/test/test_xml_etree_c.py
@@ -8,31 +8,6 @@ cET = import_fresh_module('xml.etree.ElementTree', fresh=['_elementtree'])
  cET_alias = import_fresh_module('xml.etree.cElementTree', fresh=['_elementtree', 'xml.etree'])
  
  
-# cElementTree specific tests
-
-def sanity():
-    r"""
-    Import sanity.
-
-    Issue #6697.
-
-    >>> cElementTree = cET
-    >>> e = cElementTree.Element('a')
-    >>> getattr(e, '\uD800')           # doctest: +ELLIPSIS
-    Traceback (most recent call last):
-      ...
-    UnicodeEncodeError: ...
-
-    >>> p = cElementTree.XMLParser()
-    >>> p.version.split()[0]
-    'Expat'
-    >>> getattr(p, '\uD800')
-    Traceback (most recent call last):
-     ...
-    AttributeError: 'XMLParser' object has no attribute '\ud800'
-    """
-
-
  class MiscTests(unittest.TestCase):
      # Issue #8651.
      @support.bigmemtest(size=support._2G + 100, memuse=1)
@@ -46,6 +21,7 @@ class MiscTests(unittest.TestCase):
          finally:
              data = None
  
+
  @unittest.skipUnless(cET, 'requires _elementtree')
  class TestAliasWorking(unittest.TestCase):
      # Test that the cET alias module is alive
@@ -53,6 +29,7 @@ class TestAliasWorking(unittest.TestCase):
          e = cET_alias.Element('foo')
          self.assertEqual(e.tag, 'foo')
  
+
  @unittest.skipUnless(cET, 'requires _elementtree')
  class TestAcceleratorImported(unittest.TestCase):
      # Test that the C accelerator was imported, as expected
@@ -67,7 +44,6 @@ def test_main():
      from test import test_xml_etree, test_xml_etree_c
  
      # Run the tests specific to the C implementation
-    support.run_doctest(test_xml_etree_c, verbosity=True)
      support.run_unittest(
          MiscTests,
          TestAliasWorking,
diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py

index e068fc2443d73a16febb090a20cee604ddf709f2..47766253e545366ea42ad208a246f4fe602d2c6c 100644 (file)
--- a/Lib/xml/etree/ElementTree.py
+++ b/Lib/xml/etree/ElementTree.py
@@ -916,11 +916,7 @@ def _namespaces(elem, default_namespace=None):
              _raise_serialization_error(qname)
  
      # populate qname and namespaces table
-    try:
-        iterate = elem.iter
-    except AttributeError:
-        iterate = elem.getiterator # cET compatibility
-    for elem in iterate():
+    for elem in elem.iter():
          tag = elem.tag
          if isinstance(tag, QName):
              if tag.text not in qnames:
diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c

index 103e778eea74e7c870333747cee61848127eaff0..f0b5a3fa242dc49733f26e3c290e9045b9b28f11 100644 (file)
--- a/Modules/_elementtree.c
+++ b/Modules/_elementtree.c
@@ -103,8 +103,6 @@ do { memory -= size; printf("%8d - %s\n", memory, comment); } while (0)
  /* glue functions (see the init function for details) */
  static PyObject* elementtree_parseerror_obj;
  static PyObject* elementtree_deepcopy_obj;
-static PyObject* elementtree_iter_obj;
-static PyObject* elementtree_itertext_obj;
  static PyObject* elementpath_obj;
  
  /* helpers */
@@ -1109,67 +1107,32 @@ element_getchildren(ElementObject* self, PyObject* args)
      return list;
  }
  
-static PyObject*
-element_iter(ElementObject* self, PyObject* args)
-{
-    PyObject* result;
  
-    PyObject* tag = Py_None;
-    if (!PyArg_ParseTuple(args, "|O:iter", &tag))
-        return NULL;
+static PyObject *
+create_elementiter(ElementObject *self, PyObject *tag, int gettext);
  
-    if (!elementtree_iter_obj) {
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "iter helper not found"
-            );
-        return NULL;
-    }
  
-    args = PyTuple_New(2);
-    if (!args)
+static PyObject *
+element_iter(ElementObject *self, PyObject *args)
+{
+    PyObject* tag = Py_None;
+    if (!PyArg_ParseTuple(args, "|O:iter", &tag))
          return NULL;
  
-    Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
-    Py_INCREF(tag);  PyTuple_SET_ITEM(args, 1, (PyObject*) tag);
-
-    result = PyObject_CallObject(elementtree_iter_obj, args);
-
-    Py_DECREF(args);
-
-    return result;
+    return create_elementiter(self, tag, 0);
  }
  
  
  static PyObject*
  element_itertext(ElementObject* self, PyObject* args)
  {
-    PyObject* result;
-
      if (!PyArg_ParseTuple(args, ":itertext"))
          return NULL;
  
-    if (!elementtree_itertext_obj) {
-        PyErr_SetString(
-            PyExc_RuntimeError,
-            "itertext helper not found"
-            );
-        return NULL;
-    }
-
-    args = PyTuple_New(1);
-    if (!args)
-        return NULL;
-
-    Py_INCREF(self); PyTuple_SET_ITEM(args, 0, (PyObject*) self);
-
-    result = PyObject_CallObject(elementtree_itertext_obj, args);
-
-    Py_DECREF(args);
-
-    return result;
+    return create_elementiter(self, Py_None, 1);
  }
  
+
  static PyObject*
  element_getitem(PyObject* self_, Py_ssize_t index)
  {
@@ -1790,6 +1753,267 @@ static PyTypeObject Element_Type = {
      0,                                              /* tp_free */
  };
  
+/******************************* Element iterator ****************************/
+
+/* ElementIterObject represents the iteration state over an XML element in
+ * pre-order traversal. To keep track of which sub-element should be returned
+ * next, a stack of parents is maintained. This is a standard stack-based
+ * iterative pre-order traversal of a tree.
+ * The stack is managed using a single-linked list starting at parent_stack.
+ * Each stack node contains the saved parent to which we should return after
+ * the current one is exhausted, and the next child to examine in that parent.
+ */
+typedef struct ParentLocator_t {
+    ElementObject *parent;
+    Py_ssize_t child_index;
+    struct ParentLocator_t *next;
+} ParentLocator;
+
+typedef struct {
+    PyObject_HEAD
+    ParentLocator *parent_stack;
+    ElementObject *root_element;
+    PyObject *sought_tag;
+    int root_done;
+    int gettext;
+} ElementIterObject;
+
+
+static void
+elementiter_dealloc(ElementIterObject *it)
+{
+    ParentLocator *p = it->parent_stack;
+    while (p) {
+        ParentLocator *temp = p;
+        Py_XDECREF(p->parent);
+        p = p->next;
+        PyObject_Free(temp);
+    }
+
+    Py_XDECREF(it->sought_tag);
+    Py_XDECREF(it->root_element);
+
+    PyObject_GC_UnTrack(it);
+    PyObject_GC_Del(it);
+}
+
+static int
+elementiter_traverse(ElementIterObject *it, visitproc visit, void *arg)
+{
+    ParentLocator *p = it->parent_stack;
+    while (p) {
+        Py_VISIT(p->parent);
+        p = p->next;
+    }
+
+    Py_VISIT(it->root_element);
+    Py_VISIT(it->sought_tag);
+    return 0;
+}
+
+/* Helper function for elementiter_next. Add a new parent to the parent stack.
+ */
+static ParentLocator *
+parent_stack_push_new(ParentLocator *stack, ElementObject *parent)
+{
+    ParentLocator *new_node = PyObject_Malloc(sizeof(ParentLocator));
+    if (new_node) {
+        new_node->parent = parent;
+        Py_INCREF(parent);
+        new_node->child_index = 0;
+        new_node->next = stack;
+    }
+    return new_node;
+}
+
+static PyObject *
+elementiter_next(ElementIterObject *it)
+{
+    /* Sub-element iterator.
+     * 
+     * A short note on gettext: this function serves both the iter() and
+     * itertext() methods to avoid code duplication. However, there are a few
+     * small differences in the way these iterations work. Namely:
+     *   - itertext() only yields text from nodes that have it, and continues
+     *     iterating when a node doesn't have text (so it doesn't return any
+     *     node like iter())
+     *   - itertext() also has to handle tail, after finishing with all the
+     *     children of a node.
+     */
+
+    while (1) {
+        /* Handle the case reached in the beginning and end of iteration, where
+         * the parent stack is empty. The root_done flag gives us indication
+         * whether we've just started iterating (so root_done is 0), in which
+         * case the root is returned. If root_done is 1 and we're here, the
+         * iterator is exhausted.
+         */
+        if (!it->parent_stack->parent) {
+            if (it->root_done) {
+                PyErr_SetNone(PyExc_StopIteration);
+                return NULL;
+            } else {
+                it->parent_stack = parent_stack_push_new(it->parent_stack,
+                                                         it->root_element);
+                if (!it->parent_stack) {
+                    PyErr_NoMemory();
+                    return NULL;
+                }
+
+                it->root_done = 1;
+                if (it->sought_tag == Py_None ||
+                    PyObject_RichCompareBool(it->root_element->tag,
+                                             it->sought_tag, Py_EQ) == 1) {
+                    if (it->gettext) {
+                        PyObject *text = JOIN_OBJ(it->root_element->text);
+                        if (PyObject_IsTrue(text)) {
+                            Py_INCREF(text);
+                            return text;
+                        }
+                    } else {
+                        Py_INCREF(it->root_element);
+                        return (PyObject *)it->root_element;
+                    }
+                }
+            }
+        }
+
+        /* See if there are children left to traverse in the current parent. If
+         * yes, visit the next child. If not, pop the stack and try again.
+         */
+        ElementObject *cur_parent = it->parent_stack->parent;
+        Py_ssize_t child_index = it->parent_stack->child_index;
+        if (cur_parent->extra && child_index < cur_parent->extra->length) {
+            ElementObject *child = (ElementObject *)
+                cur_parent->extra->children[child_index];
+            it->parent_stack->child_index++;
+            it->parent_stack = parent_stack_push_new(it->parent_stack,
+                                                     child);
+            if (!it->parent_stack) {
+                PyErr_NoMemory();
+                return NULL;
+            }
+
+            if (it->gettext) {
+                PyObject *text = JOIN_OBJ(child->text);
+                if (PyObject_IsTrue(text)) {
+                    Py_INCREF(text);
+                    return text;
+                }
+            } else if (it->sought_tag == Py_None ||
+                PyObject_RichCompareBool(child->tag,
+                                         it->sought_tag, Py_EQ) == 1) {
+                Py_INCREF(child);
+                return (PyObject *)child;
+            }
+            else
+                continue;
+        }
+        else {
+            PyObject *tail = it->gettext ? JOIN_OBJ(cur_parent->tail) : Py_None;            
+            ParentLocator *next = it->parent_stack->next;
+            Py_XDECREF(it->parent_stack->parent);
+            PyObject_Free(it->parent_stack);
+            it->parent_stack = next;
+
+            /* Note that extra condition on it->parent_stack->parent here;
+             * this is because itertext() is supposed to only return *inner*
+             * text, not text following the element it began iteration with.
+             */
+            if (it->parent_stack->parent && PyObject_IsTrue(tail)) {
+                Py_INCREF(tail);
+                return tail;
+            }
+        }
+    }
+
+    return NULL;
+}
+
+
+static PyTypeObject ElementIter_Type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_elementtree._element_iterator",           /* tp_name */
+    sizeof(ElementIterObject),                  /* tp_basicsize */
+    0,                                          /* tp_itemsize */
+    /* methods */
+    (destructor)elementiter_dealloc,            /* tp_dealloc */
+    0,                                          /* tp_print */
+    0,                                          /* tp_getattr */
+    0,                                          /* tp_setattr */
+    0,                                          /* tp_reserved */
+    0,                                          /* tp_repr */
+    0,                                          /* tp_as_number */
+    0,                                          /* tp_as_sequence */
+    0,                                          /* tp_as_mapping */
+    0,                                          /* tp_hash */
+    0,                                          /* tp_call */
+    0,                                          /* tp_str */
+    0,                                          /* tp_getattro */
+    0,                                          /* tp_setattro */
+    0,                                          /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
+    0,                                          /* tp_doc */
+    (traverseproc)elementiter_traverse,         /* tp_traverse */
+    0,                                          /* tp_clear */
+    0,                                          /* tp_richcompare */
+    0,                                          /* tp_weaklistoffset */
+    PyObject_SelfIter,                          /* tp_iter */
+    (iternextfunc)elementiter_next,             /* tp_iternext */
+    0,                                          /* tp_methods */
+    0,                                          /* tp_members */
+    0,                                          /* tp_getset */
+    0,                                          /* tp_base */
+    0,                                          /* tp_dict */
+    0,                                          /* tp_descr_get */
+    0,                                          /* tp_descr_set */
+    0,                                          /* tp_dictoffset */
+    0,                                          /* tp_init */
+    0,                                          /* tp_alloc */
+    0,                                          /* tp_new */
+};
+
+
+static PyObject *
+create_elementiter(ElementObject *self, PyObject *tag, int gettext)
+{
+    ElementIterObject *it;
+    PyObject *star = NULL;
+
+    it = PyObject_GC_New(ElementIterObject, &ElementIter_Type);
+    if (!it)
+        return NULL;
+    if (!(it->parent_stack = PyObject_Malloc(sizeof(ParentLocator)))) {
+        PyObject_GC_Del(it);
+        return NULL;
+    }
+
+    it->parent_stack->parent = NULL;
+    it->parent_stack->child_index = 0;
+    it->parent_stack->next = NULL;
+
+    if (PyUnicode_Check(tag))
+        star = PyUnicode_FromString("*");
+    else if (PyBytes_Check(tag))
+        star = PyBytes_FromString("*");
+
+    if (star && PyObject_RichCompareBool(tag, star, Py_EQ) == 1)
+        tag = Py_None;
+
+    Py_XDECREF(star);
+    it->sought_tag = tag;
+    it->root_done = 0;
+    it->gettext = gettext;
+    it->root_element = self;
+
+    Py_INCREF(self);
+    Py_INCREF(tag);
+
+    PyObject_GC_Track(it);
+    return (PyObject *)it;
+}
+
+
  /* ==================================================================== */
  /* the tree builder type */
  
@@ -3238,8 +3462,7 @@ static struct PyModuleDef _elementtreemodule = {
  PyMODINIT_FUNC
  PyInit__elementtree(void)
  {
-    PyObject *m, *g, *temp;
-    char* bootstrap;
+    PyObject *m, *temp;
  
      /* Initialize object types */
      if (PyType_Ready(&TreeBuilder_Type) < 0)
@@ -3255,44 +3478,6 @@ PyInit__elementtree(void)
      if (!m)
          return NULL;
  
-    /* The code below requires that the module gets already added
-       to sys.modules. */
-    PyDict_SetItemString(PyImport_GetModuleDict(),
-                         _elementtreemodule.m_name,
-                         m);
-
-    /* python glue code */
-
-    g = PyDict_New();
-    if (!g)
-        return NULL;
-
-    PyDict_SetItemString(g, "__builtins__", PyEval_GetBuiltins());
-
-    bootstrap = (
-        "def iter(node, tag=None):\n" /* helper */
-        "  if tag == '*':\n"
-        "    tag = None\n"
-        "  if tag is None or node.tag == tag:\n"
-        "    yield node\n"
-        "  for node in node:\n"
-        "    for node in iter(node, tag):\n"
-        "      yield node\n"
-
-        "def itertext(node):\n" /* helper */
-        "  if node.text:\n"
-        "    yield node.text\n"
-        "  for e in node:\n"
-        "    for s in e.itertext():\n"
-        "      yield s\n"
-        "    if e.tail:\n"
-        "      yield e.tail\n"
-
-       );
-
-    if (!PyRun_String(bootstrap, Py_file_input, g, NULL))
-        return NULL;
-
      if (!(temp = PyImport_ImportModule("copy")))
          return NULL;
      elementtree_deepcopy_obj = PyObject_GetAttrString(temp, "deepcopy");
@@ -3301,9 +3486,6 @@ PyInit__elementtree(void)
      if (!(elementpath_obj = PyImport_ImportModule("xml.etree.ElementPath")))
          return NULL;
  
-    elementtree_iter_obj = PyDict_GetItemString(g, "iter");
-    elementtree_itertext_obj = PyDict_GetItemString(g, "itertext");
-
      /* link against pyexpat */
      expat_capi = PyCapsule_Import(PyExpat_CAPSULE_NAME, 0);
      if (expat_capi) {
author	Eli Bendersky <eliben@gmail.com>
	Fri, 15 Jun 2012 04:42:50 +0000 (07:42 +0300)
committer	Eli Bendersky <eliben@gmail.com>
	Fri, 15 Jun 2012 04:42:50 +0000 (07:42 +0300)
Lib/test/test_xml_etree.py		patch \| blob \| history
Lib/test/test_xml_etree_c.py		patch \| blob \| history
Lib/xml/etree/ElementTree.py		patch \| blob \| history
Modules/_elementtree.c		patch \| blob \| history