import re
import warnings
import io
+import collections
import contextlib
from . import ElementPath
Returns an iterator providing (event, elem) pairs.
"""
+ # Use the internal, undocumented _parser argument for now; When the
+ # parser argument of iterparse is removed, this can be killed.
+ pullparser = XMLPullParser(events=events, _parser=parser)
+ def iterator():
+ try:
+ while True:
+ yield from pullparser.read_events()
+ # load event buffer
+ data = source.read(16 * 1024)
+ if not data:
+ break
+ pullparser.feed(data)
+ root = pullparser._close_and_return_root()
+ yield from pullparser.read_events()
+ it.root = root
+ finally:
+ if close_source:
+ source.close()
+
+ class IterParseIterator(collections.Iterator):
+ __next__ = iterator().__next__
+ it = IterParseIterator()
+ it.root = None
+ del iterator, IterParseIterator
+
close_source = False
if not hasattr(source, "read"):
source = open(source, "rb")
close_source = True
- try:
- return _IterParseIterator(source, events, parser, close_source)
- except:
- if close_source:
- source.close()
- raise
+
+ return it
class XMLPullParser:
# upon in user code. It will be removed in a future release.
# See http://bugs.python.org/issue17741 for more details.
- # _elementtree.c expects a list, not a deque
- self._events_queue = []
- self._index = 0
+ self._events_queue = collections.deque()
self._parser = _parser or XMLParser(target=TreeBuilder())
# wire up the parser for event reporting
if events is None:
retrieved from the iterator.
"""
events = self._events_queue
- while True:
- index = self._index
- try:
- event = events[self._index]
- # Avoid retaining references to past events
- events[self._index] = None
- except IndexError:
- break
- index += 1
- # Compact the list in a O(1) amortized fashion
- # As noted above, _elementree.c needs a list, not a deque
- if index * 2 >= len(events):
- events[:index] = []
- self._index = 0
- else:
- self._index = index
+ while events:
+ event = events.popleft()
if isinstance(event, Exception):
raise event
else:
yield event
-class _IterParseIterator:
-
- def __init__(self, source, events, parser, close_source=False):
- # Use the internal, undocumented _parser argument for now; When the
- # parser argument of iterparse is removed, this can be killed.
- self._parser = XMLPullParser(events=events, _parser=parser)
- self._file = source
- self._close_file = close_source
- self.root = self._root = None
-
- def __next__(self):
- try:
- while 1:
- for event in self._parser.read_events():
- return event
- if self._parser._parser is None:
- break
- # load event buffer
- data = self._file.read(16 * 1024)
- if data:
- self._parser.feed(data)
- else:
- self._root = self._parser._close_and_return_root()
- self.root = self._root
- except:
- if self._close_file:
- self._file.close()
- raise
- if self._close_file:
- self._file.close()
- raise StopIteration
-
- def __iter__(self):
- return self
-
-
def XML(text, parser=None):
"""Parse XML document from string constant.
Library
-------
+- Issue #25638: Optimized ElementTree.iterparse(); it is now 2x faster.
+
- Issue #25761: Improved detecting errors in broken pickle data.
- Issue #25717: Restore the previous behaviour of tolerating most fstat()
PyObject *element_factory;
/* element tracing */
- PyObject *events; /* list of events, or NULL if not collecting */
+ PyObject *events_append; /* the append method of the list of events, or NULL */
PyObject *start_event_obj; /* event objects (NULL to ignore) */
PyObject *end_event_obj;
PyObject *start_ns_event_obj;
}
t->index = 0;
- t->events = NULL;
+ t->events_append = NULL;
t->start_event_obj = t->end_event_obj = NULL;
t->start_ns_event_obj = t->end_ns_event_obj = NULL;
}
Py_CLEAR(self->start_ns_event_obj);
Py_CLEAR(self->end_event_obj);
Py_CLEAR(self->start_event_obj);
- Py_CLEAR(self->events);
+ Py_CLEAR(self->events_append);
Py_CLEAR(self->stack);
Py_CLEAR(self->data);
Py_CLEAR(self->last);
PyObject *node)
{
if (action != NULL) {
- PyObject *res = PyTuple_Pack(2, action, node);
- if (res == NULL)
+ PyObject *res;
+ PyObject *event = PyTuple_Pack(2, action, node);
+ if (event == NULL)
return -1;
- if (PyList_Append(self->events, res) < 0) {
- Py_DECREF(res);
+ res = PyObject_CallFunctionObjArgs(self->events_append, event, NULL);
+ Py_DECREF(event);
+ if (res == NULL)
return -1;
- }
Py_DECREF(res);
}
return 0;
if (PyErr_Occurred())
return;
- if (!target->events || !target->start_ns_event_obj)
+ if (!target->events_append || !target->start_ns_event_obj)
return;
if (!uri)
if (PyErr_Occurred())
return;
- if (!target->events)
+ if (!target->events_append)
return;
treebuilder_append_event(target, target->end_ns_event_obj, Py_None);
/*[clinic input]
_elementtree.XMLParser._setevents
- events_queue: object(subclass_of='&PyList_Type')
+ events_queue: object
events_to_report: object = None
/
_elementtree_XMLParser__setevents_impl(XMLParserObject *self,
PyObject *events_queue,
PyObject *events_to_report)
-/*[clinic end generated code: output=1440092922b13ed1 input=59db9742910c6174]*/
+/*[clinic end generated code: output=1440092922b13ed1 input=abf90830a1c3b0fc]*/
{
/* activate element event reporting */
Py_ssize_t i, seqlen;
TreeBuilderObject *target;
- PyObject *events_seq;
+ PyObject *events_append, *events_seq;
if (!TreeBuilder_CheckExact(self->target)) {
PyErr_SetString(
target = (TreeBuilderObject*) self->target;
- Py_INCREF(events_queue);
- Py_XDECREF(target->events);
- target->events = events_queue;
+ events_append = PyObject_GetAttrString(events_queue, "append");
+ if (events_append == NULL)
+ return NULL;
+ Py_XDECREF(target->events_append);
+ target->events_append = events_append;
/* clear out existing events */
Py_CLEAR(target->start_event_obj);
PyObject *events_queue;
PyObject *events_to_report = Py_None;
- if (!PyArg_ParseTuple(args, "O!|O:_setevents",
- &PyList_Type, &events_queue, &events_to_report))
+ if (!PyArg_UnpackTuple(args, "_setevents",
+ 1, 2,
+ &events_queue, &events_to_report))
goto exit;
return_value = _elementtree_XMLParser__setevents_impl(self, events_queue, events_to_report);
exit:
return return_value;
}
-/*[clinic end generated code: output=25b8bf7e7f2151ca input=a9049054013a1b77]*/
+/*[clinic end generated code: output=19d94e2d2726d3aa input=a9049054013a1b77]*/