From ffca16e25a70fd44a87b13b379b5ec0c7a11e926 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bernt=20R=C3=B8skar=20Brenna?= Date: Sun, 14 Apr 2019 10:07:02 +0200 Subject: [PATCH] bpo-36227: ElementTree.tostring() default_namespace and xml_declaration arguments (GH-12225) Add new keyword arguments "default_namespace" and "xml_declaration" to functions ET.tostring() and ET.tostringlist(), as known from ElementTree.write(). --- Doc/library/xml.etree.elementtree.rst | 21 ++- Lib/test/test_xml_etree.py | 123 ++++++++++++++++++ Lib/xml/etree/ElementTree.py | 15 ++- .../2019-03-07-20-02-18.bpo-36227.i2Z1XR.rst | 2 + 4 files changed, 152 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-03-07-20-02-18.bpo-36227.i2Z1XR.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 4a7cf6f095..9bee0eadc2 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -594,6 +594,7 @@ Functions .. function:: tostring(element, encoding="us-ascii", method="xml", *, \ + xml_declaration=None, default_namespace=None, short_empty_elements=True) Generates a string representation of an XML element, including all @@ -601,14 +602,19 @@ Functions the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to generate a Unicode string (otherwise, a bytestring is generated). *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``). - *short_empty_elements* has the same meaning as in :meth:`ElementTree.write`. - Returns an (optionally) encoded string containing the XML data. + *xml_declaration*, *default_namespace* and *short_empty_elements* has the same + meaning as in :meth:`ElementTree.write`. Returns an (optionally) encoded string + containing the XML data. .. versionadded:: 3.4 The *short_empty_elements* parameter. + .. versionadded:: 3.8 + The *xml_declaration* and *default_namespace* parameters. + .. function:: tostringlist(element, encoding="us-ascii", method="xml", *, \ + xml_declaration=None, default_namespace=None, short_empty_elements=True) Generates a string representation of an XML element, including all @@ -616,16 +622,19 @@ Functions the output encoding (default is US-ASCII). Use ``encoding="unicode"`` to generate a Unicode string (otherwise, a bytestring is generated). *method* is either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``). - *short_empty_elements* has the same meaning as in :meth:`ElementTree.write`. - Returns a list of (optionally) encoded strings containing the XML data. - It does not guarantee any specific sequence, except that - ``b"".join(tostringlist(element)) == tostring(element)``. + *xml_declaration*, *default_namespace* and *short_empty_elements* has the same + meaning as in :meth:`ElementTree.write`. Returns a list of (optionally) encoded + strings containing the XML data. It does not guarantee any specific sequence, + except that ``b"".join(tostringlist(element)) == tostring(element)``. .. versionadded:: 3.2 .. versionadded:: 3.4 The *short_empty_elements* parameter. + .. versionadded:: 3.8 + The *xml_declaration* and *default_namespace* parameters. + .. function:: XML(text, parser=None) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 8a7ec0076f..bdcd4e0d19 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -9,6 +9,7 @@ import copy import functools import html import io +import locale import operator import pickle import sys @@ -756,6 +757,128 @@ class ElementTreeTest(unittest.TestCase): elem = ET.fromstring("text") self.assertEqual(ET.tostring(elem), b'text') + def test_tostring_default_namespace(self): + elem = ET.XML('') + self.assertEqual( + ET.tostring(elem, encoding='unicode'), + '' + ) + self.assertEqual( + ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'), + '' + ) + + def test_tostring_default_namespace_different_namespace(self): + elem = ET.XML('') + self.assertEqual( + ET.tostring(elem, encoding='unicode', default_namespace='foobar'), + '' + ) + + def test_tostring_default_namespace_original_no_namespace(self): + elem = ET.XML('') + EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$' + with self.assertRaisesRegex(ValueError, EXPECTED_MSG): + ET.tostring(elem, encoding='unicode', default_namespace='foobar') + + def test_tostring_no_xml_declaration(self): + elem = ET.XML('') + self.assertEqual( + ET.tostring(elem, encoding='unicode'), + '' + ) + + def test_tostring_xml_declaration(self): + elem = ET.XML('') + self.assertEqual( + ET.tostring(elem, encoding='utf8', xml_declaration=True), + b"\n" + ) + + def test_tostring_xml_declaration_unicode_encoding(self): + elem = ET.XML('') + preferredencoding = locale.getpreferredencoding() + self.assertEqual( + f"\n", + ET.tostring(elem, encoding='unicode', xml_declaration=True) + ) + + def test_tostring_xml_declaration_cases(self): + elem = ET.XML('ø') + preferredencoding = locale.getpreferredencoding() + TESTCASES = [ + # (expected_retval, encoding, xml_declaration) + # ... xml_declaration = None + (b'ø', None, None), + (b'\xc3\xb8', 'UTF-8', None), + (b'ø', 'US-ASCII', None), + (b"\n" + b"\xf8", 'ISO-8859-1', None), + ('ø', 'unicode', None), + + # ... xml_declaration = False + (b"ø", None, False), + (b"\xc3\xb8", 'UTF-8', False), + (b"ø", 'US-ASCII', False), + (b"\xf8", 'ISO-8859-1', False), + ("ø", 'unicode', False), + + # ... xml_declaration = True + (b"\n" + b"ø", None, True), + (b"\n" + b"\xc3\xb8", 'UTF-8', True), + (b"\n" + b"ø", 'US-ASCII', True), + (b"\n" + b"\xf8", 'ISO-8859-1', True), + (f"\n" + "ø", 'unicode', True), + + ] + for expected_retval, encoding, xml_declaration in TESTCASES: + with self.subTest(f'encoding={encoding} ' + f'xml_declaration={xml_declaration}'): + self.assertEqual( + ET.tostring( + elem, + encoding=encoding, + xml_declaration=xml_declaration + ), + expected_retval + ) + + def test_tostringlist_default_namespace(self): + elem = ET.XML('') + self.assertEqual( + ''.join(ET.tostringlist(elem, encoding='unicode')), + '' + ) + self.assertEqual( + ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')), + '' + ) + + def test_tostringlist_xml_declaration(self): + elem = ET.XML('') + self.assertEqual( + ''.join(ET.tostringlist(elem, encoding='unicode')), + '' + ) + self.assertEqual( + b''.join(ET.tostringlist(elem, xml_declaration=True)), + b"\n" + ) + + preferredencoding = locale.getpreferredencoding() + stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True) + self.assertEqual( + ''.join(stringlist), + f"\n" + ) + self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>") + self.assertEqual(['', '', ''], stringlist[1:]) + def test_encoding(self): def check(encoding, body=''): xml = ("%s" % diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index b5ad8e1d14..c9e2f36835 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1113,6 +1113,7 @@ def _escape_attrib_html(text): # -------------------------------------------------------------------- def tostring(element, encoding=None, method=None, *, + xml_declaration=None, default_namespace=None, short_empty_elements=True): """Generate string representation of XML element. @@ -1121,13 +1122,17 @@ def tostring(element, encoding=None, method=None, *, *element* is an Element instance, *encoding* is an optional output encoding defaulting to US-ASCII, *method* is an optional output which can - be one of "xml" (default), "html", "text" or "c14n". + be one of "xml" (default), "html", "text" or "c14n", *default_namespace* + sets the default XML namespace (for "xmlns"). Returns an (optionally) encoded string containing the XML data. """ stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() - ElementTree(element).write(stream, encoding, method=method, + ElementTree(element).write(stream, encoding, + xml_declaration=xml_declaration, + default_namespace=default_namespace, + method=method, short_empty_elements=short_empty_elements) return stream.getvalue() @@ -1149,10 +1154,14 @@ class _ListDataStream(io.BufferedIOBase): return len(self.lst) def tostringlist(element, encoding=None, method=None, *, + xml_declaration=None, default_namespace=None, short_empty_elements=True): lst = [] stream = _ListDataStream(lst) - ElementTree(element).write(stream, encoding, method=method, + ElementTree(element).write(stream, encoding, + xml_declaration=xml_declaration, + default_namespace=default_namespace, + method=method, short_empty_elements=short_empty_elements) return lst diff --git a/Misc/NEWS.d/next/Library/2019-03-07-20-02-18.bpo-36227.i2Z1XR.rst b/Misc/NEWS.d/next/Library/2019-03-07-20-02-18.bpo-36227.i2Z1XR.rst new file mode 100644 index 0000000000..3b5b6cda09 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-03-07-20-02-18.bpo-36227.i2Z1XR.rst @@ -0,0 +1,2 @@ +Added support for keyword arguments `default_namespace` and `xml_declaration` in functions +ElementTree.tostring() and ElementTree.tostringlist(). -- 2.40.0