]> granicus.if.org Git - python/commitdiff
Issue #6233: ElementTree failed converting unicode characters to XML
authorAntoine Pitrou <solipsis@pitrou.net>
Tue, 9 Feb 2010 16:51:16 +0000 (16:51 +0000)
committerAntoine Pitrou <solipsis@pitrou.net>
Tue, 9 Feb 2010 16:51:16 +0000 (16:51 +0000)
entities when they could't be represented in the requested output
encoding.  Patch by Jerry Chen.

Lib/test/test_xml_etree.py
Lib/xml/etree/ElementTree.py
Misc/ACKS
Misc/NEWS

index 895902f02f36a06dfaaf533da96b008f1924ef35..a7ad48b2e8e3219d4390e34a592ded62e79c03aa 100644 (file)
@@ -210,6 +210,17 @@ def check_encoding(ET, encoding):
     """
     ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
 
+def check_issue6233():
+    """
+    >>> from xml.etree import ElementTree as ET
+
+    >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\xe3g</body>")
+    >>> ET.tostring(e, 'ascii')
+    b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
+    >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\xe3g</body>".encode('iso-8859-1')) # create byte string with the right encoding
+    >>> ET.tostring(e, 'ascii')
+    b"<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
+    """
 
 #
 # xinclude tests (samples from appendix C of the xinclude specification)
index cfac4f7090fae6a8ede7c97b32c8aae918680dd9..c47573e313637a38168a5c993963e2782db4b4be 100644 (file)
@@ -662,9 +662,9 @@ class ElementTree:
         # write XML to file
         tag = node.tag
         if tag is Comment:
-            file.write(_encode("<!-- %s -->" % _escape_cdata(node.text), encoding))
+            file.write(b"<!-- " + _encode_cdata(node.text, encoding) + b" -->")
         elif tag is ProcessingInstruction:
-            file.write(_encode("<?%s?>" % _escape_cdata(node.text), encoding))
+            file.write(b"<?" + _encode_cdata(node.text, encoding) + b"?>")
         else:
             items = list(node.items())
             xmlns_items = [] # new namespaces in this scope
@@ -696,7 +696,7 @@ class ElementTree:
             if node.text or len(node):
                 file.write(_encode(">", encoding))
                 if node.text:
-                    file.write(_encode(_escape_cdata(node.text), encoding))
+                    file.write(_encode_cdata(node.text, encoding))
                 for n in node:
                     self._write(file, n, encoding, namespaces)
                 file.write(_encode("</" + tag + ">", encoding))
@@ -705,7 +705,7 @@ class ElementTree:
             for k, v in xmlns_items:
                 del namespaces[v]
         if node.tail:
-            file.write(_encode(_escape_cdata(node.tail), encoding))
+            file.write(_encode_cdata(node.tail, encoding))
 
 # --------------------------------------------------------------------
 # helpers
@@ -788,13 +788,16 @@ def _encode_entity(text, pattern=_escape):
 # the following functions assume an ascii-compatible encoding
 # (or "utf-16")
 
-def _escape_cdata(text):
+def _encode_cdata(text, encoding):
     # escape character data
     try:
         text = text.replace("&", "&amp;")
         text = text.replace("<", "&lt;")
         text = text.replace(">", "&gt;")
-        return text
+        if encoding:
+            return text.encode(encoding, "xmlcharrefreplace")
+        else:
+            return text
     except (TypeError, AttributeError):
         _raise_serialization_error(text)
 
index f349b1e3d1a7c85ff3673d76771b6dfb9d2b724e..b5dccde4fd379792f9765235d83c460e0eaee9e0 100644 (file)
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -131,6 +131,7 @@ Greg Chapman
 Brad Chapman
 David Chaum
 Nicolas Chauvat
+Jerry Chen
 Michael Chermside
 Albert Chin-A-Young
 Adal Chiriliuc
index 124ea0ff274c68029e7c5d0f00e9d1b3b34fab0b..9d2b7cf083611308f28185c3e910a77b9dac221f 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -242,6 +242,10 @@ C-API
 Library
 -------
 
+- Issue #6233: ElementTree failed converting unicode characters to XML
+  entities when they could't be represented in the requested output
+  encoding.  Patch by Jerry Chen.
+
 - Issue #6003: add an argument to ``zipfile.Zipfile.writestr`` to
   specify the compression type.