bpo-30485: support a default prefix mapping in ElementPath by passing None as prefix...

author Stefan Behnel <stefan_ml@behnel.de>

Sun, 14 Apr 2019 08:09:09 +0000 (10:09 +0200)

committer GitHub <noreply@github.com>

Sun, 14 Apr 2019 08:09:09 +0000 (10:09 +0200)
author Stefan Behnel <stefan_ml@behnel.de>
Sun, 14 Apr 2019 08:09:09 +0000 (10:09 +0200)
committer GitHub <noreply@github.com>
Sun, 14 Apr 2019 08:09:09 +0000 (10:09 +0200)
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst

index 9bee0eadc28950d59c538f1e6ed086fba510fbb9..c83e719e959a29ead23a05b288b1460e87d708ba 100644 (file)
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -764,7 +764,8 @@ Element Objects
        Finds the first subelement matching *match*.  *match* may be a tag name
        or a :ref:`path <elementtree-xpath>`.  Returns an element instance
        or ``None``.  *namespaces* is an optional mapping from namespace prefix
-      to full name.
+      to full name.  Pass ``None`` as prefix to move all unprefixed tag names
+      in the expression into the given namespace.
  
  
     .. method:: findall(match, namespaces=None)
@@ -772,7 +773,8 @@ Element Objects
        Finds all matching subelements, by tag name or
        :ref:`path <elementtree-xpath>`.  Returns a list containing all matching
        elements in document order.  *namespaces* is an optional mapping from
-      namespace prefix to full name.
+      namespace prefix to full name.  Pass ``None`` as prefix to move all
+      unprefixed tag names in the expression into the given namespace.
  
  
     .. method:: findtext(match, default=None, namespaces=None)
@@ -782,7 +784,8 @@ Element Objects
        of the first matching element, or *default* if no element was found.
        Note that if the matching element has no text content an empty string
        is returned. *namespaces* is an optional mapping from namespace prefix
-      to full name.
+      to full name.  Pass ``None`` as prefix to move all unprefixed tag names
+      in the expression into the given namespace.
  
  
     .. method:: getchildren()
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py

index bdcd4e0d19a7c707f54b34487675719d005e9959..2f7a3b60b22dd7c516ca792b3e07f3f674ff2959 100644 (file)
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2463,6 +2463,12 @@ class ElementFindTest(unittest.TestCase):
          nsmap = {'xx': 'Y'}
          self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
          self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
+        nsmap = {'xx': 'X', None: 'Y'}
+        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
+        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
+        nsmap = {'xx': 'X', '': 'Y'}
+        with self.assertRaisesRegex(ValueError, 'namespace prefix'):
+            root.findall(".//xx:b", namespaces=nsmap)
  
      def test_bad_find(self):
          e = ET.XML(SAMPLE_XML)
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py

index ef32917b14d41e22a490d658840750d68e9493c7..0e3854f9db22cc95a1d0f64fc30d0bbc07949da2 100644 (file)
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -71,16 +71,22 @@ xpath_tokenizer_re = re.compile(
      )
  
  def xpath_tokenizer(pattern, namespaces=None):
+    default_namespace = namespaces.get(None) if namespaces else None
      for token in xpath_tokenizer_re.findall(pattern):
          tag = token[1]
-        if tag and tag[0] != "{" and ":" in tag:
-            try:
+        if tag and tag[0] != "{":
+            if ":" in tag:
                  prefix, uri = tag.split(":", 1)
-                if not namespaces:
-                    raise KeyError
-                yield token[0], "{%s}%s" % (namespaces[prefix], uri)
-            except KeyError:
-                raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
+                try:
+                    if not namespaces:
+                        raise KeyError
+                    yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+                except KeyError:
+                    raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
+            elif default_namespace:
+                yield token[0], "{%s}%s" % (default_namespace, tag)
+            else:
+                yield token
          else:
              yield token
  
@@ -264,10 +270,19 @@ class _SelectorContext:
  
  def iterfind(elem, path, namespaces=None):
      # compile selector pattern
-    cache_key = (path, None if namespaces is None
-                            else tuple(sorted(namespaces.items())))
      if path[-1:] == "/":
          path = path + "*" # implicit all (FIXME: keep this?)
+
+    cache_key = (path,)
+    if namespaces:
+        if '' in namespaces:
+            raise ValueError("empty namespace prefix must be passed as None, not the empty string")
+        if None in namespaces:
+            cache_key += (namespaces[None],) + tuple(sorted(
+                item for item in namespaces.items() if item[0] is not None))
+        else:
+            cache_key += tuple(sorted(namespaces.items()))
+
      try:
          selector = _cache[cache_key]
      except KeyError:
diff --git a/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst b/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst

new file mode 100644 (file)

index 0000000..6c82efd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst
@@ -0,0 +1,3 @@
+Path expressions in xml.etree.ElementTree can now avoid explicit namespace
+prefixes for tags (or the "{namespace}tag" notation) by passing a default
+namespace with a 'None' prefix.
author	Stefan Behnel <stefan_ml@behnel.de>
	Sun, 14 Apr 2019 08:09:09 +0000 (10:09 +0200)
committer	GitHub <noreply@github.com>
	Sun, 14 Apr 2019 08:09:09 +0000 (10:09 +0200)
Doc/library/xml.etree.elementtree.rst		patch \| blob \| history
Lib/test/test_xml_etree.py		patch \| blob \| history
Lib/xml/etree/ElementPath.py		patch \| blob \| history
Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst	[new file with mode: 0644]	patch \| blob