bpo-31648: Improve ElementPath (#3835)
authorscoder <stefan_ml@behnel.de>
Sat, 30 Sep 2017 13:35:21 +0000 (15:35 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Sat, 30 Sep 2017 13:35:21 +0000 (16:35 +0300)
* Allow whitespace inside of ElementPath predicates.
* Add ElementPath predicate support for text comparison of the current node, like "[.='text']".

Doc/library/xml.etree.elementtree.rst
Doc/whatsnew/3.7.rst
Lib/test/test_xml_etree.py
Lib/xml/etree/ElementPath.py
Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst [new file with mode: 0644]

index 7d814ad406eb1b53654d4535e2747aa9706c0eb3..61808596a555323151038b56bd41481c144b377f 100644 (file)
@@ -437,6 +437,11 @@ Supported XPath syntax
 | ``[tag]``             | Selects all elements that have a child named         |
 |                       | ``tag``.  Only immediate children are supported.     |
 +-----------------------+------------------------------------------------------+
+| ``[.='text']``        | Selects all elements whose complete text content,    |
+|                       | including descendants, equals the given ``text``.    |
+|                       |                                                      |
+|                       | .. versionadded:: 3.7                                |
++-----------------------+------------------------------------------------------+
 | ``[tag='text']``      | Selects all elements that have a child named         |
 |                       | ``tag`` whose complete text content, including       |
 |                       | descendants, equals the given ``text``.              |
index a474e767529f46740d6ed6ead299c6e59272c5c3..845ed643f97b5bf4ab2cdbf4d0aaf36879d5c174 100644 (file)
@@ -281,6 +281,14 @@ Function :func:`~uu.encode` now accepts an optional *backtick*
 keyword argument.  When it's true, zeros are represented by ``'`'``
 instead of spaces.  (Contributed by Xiang Zhang in :issue:`30103`.)
 
+xml.etree
+---------
+
+:ref:`ElementPath <elementtree-xpath>` predicates in the :meth:`find`
+methods can now compare text of the current node with ``[. = "text"]``,
+not only text in children.  Predicates also allow adding spaces for
+better readability.  (Contributed by Stefan Behnel in :issue:`31648`.)
+
 zipapp
 ------
 
index 661ad8b9d4dfeff1fdcc06e95a6fde1c22501d56..02812f32bc9ccf9c66fdd2c7cfdcf91393463845 100644 (file)
@@ -2237,6 +2237,39 @@ class ElementFindTest(unittest.TestCase):
             ['tag'] * 2)
         self.assertEqual(e.findall('section//'), e.findall('section//*'))
 
+        self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+            ['section'])
+        self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
+            ['section'])
+
+        self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+                         ['tag'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
+                         [])
+        self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
+                         [])
+
+        # duplicate section => 2x tag matches
+        e[1] = e[2]
+        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+                         ['section', 'section'])
+        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+                         ['tag', 'tag'])
+
     def test_test_find_with_ns(self):
         e = ET.XML(SAMPLE_XML_NS)
         self.assertEqual(summarize_list(e.findall('tag')), [])
index 361f6d54fa554043c7b70b5fef8cf7b6f08a555c..c9d6ef345b91126d7a64b45a9b482c1190a4b59a 100644 (file)
@@ -157,6 +157,9 @@ def prepare_predicate(next, token):
             return
         if token[0] == "]":
             break
+        if token == ('', ''):
+            # ignore whitespace
+            continue
         if token[0] and token[0][:1] in "'\"":
             token = "'", token[0][1:-1]
         signature.append(token[0] or "-")
@@ -188,16 +191,22 @@ def prepare_predicate(next, token):
                 if elem.find(tag) is not None:
                     yield elem
         return select
-    if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
-        # [tag='value']
+    if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
+        # [.='value'] or [tag='value']
         tag = predicate[0]
         value = predicate[-1]
-        def select(context, result):
-            for elem in result:
-                for e in elem.findall(tag):
-                    if "".join(e.itertext()) == value:
+        if tag:
+            def select(context, result):
+                for elem in result:
+                    for e in elem.findall(tag):
+                        if "".join(e.itertext()) == value:
+                            yield elem
+                            break
+        else:
+            def select(context, result):
+                for elem in result:
+                    if "".join(elem.itertext()) == value:
                         yield elem
-                        break
         return select
     if signature == "-" or signature == "-()" or signature == "-()-":
         # [index] or [last()] or [last()-index]
diff --git a/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
new file mode 100644 (file)
index 0000000..8b39ce9
--- /dev/null
@@ -0,0 +1,6 @@
+Improvements to path predicates in ElementTree:
+
+* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']".
+* Add support for text comparison of the current node, like "[.='text']".
+
+Patch by Stefan Behnel.