]> granicus.if.org Git - python/commitdiff
bpo-13611: C14N 2.0 implementation for ElementTree (GH-12966)
authorStefan Behnel <stefan_ml@behnel.de>
Wed, 1 May 2019 20:34:13 +0000 (22:34 +0200)
committerGitHub <noreply@github.com>
Wed, 1 May 2019 20:34:13 +0000 (22:34 +0200)
* Implement C14N 2.0 as a new canonicalize() function in ElementTree.

Missing features:
- prefix renaming in XPath expressions (tag and attribute text is supported)
- preservation of original prefixes given redundant namespace declarations

60 files changed:
Doc/library/xml.etree.elementtree.rst
Doc/whatsnew/3.8.rst
Lib/test/test_xml_etree.py
Lib/test/xmltestdata/c14n-20/c14nComment.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nPrefixQname.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nPrefixQnameXpathElem.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nQname.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nQnameElem.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nQnameXpathElem.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/c14nTrim.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/doc.dtd [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/doc.xsl [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inC14N1.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inC14N2.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inC14N3.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inC14N4.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inC14N5.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inC14N6.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inNsContent.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inNsDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inNsPushdown.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inNsRedecl.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inNsSort.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inNsSuperfluous.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/inNsXml.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nComment.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nTrim.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nTrim.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nTrim.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nTrim.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inC14N6_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameElem.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameXpathElem.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nDefault.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefix.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefixQname.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nQname.xml [new file with mode: 0644]
Lib/test/xmltestdata/c14n-20/world.txt [new file with mode: 0644]
Lib/xml/etree/ElementTree.py
Misc/NEWS.d/next/Library/2019-04-26-10-10-34.bpo-13611.XEF4bg.rst [new file with mode: 0644]

index 66090af00fa16edbb2617969387589b3ffe69cc9..ef74d0c852cd753dd50ffe71a19eb22485470ec4 100644 (file)
@@ -465,6 +465,53 @@ Reference
 Functions
 ^^^^^^^^^
 
+.. function:: canonicalize(xml_data=None, *, out=None, from_file=None, **options)
+
+   `C14N 2.0 <https://www.w3.org/TR/xml-c14n2/>`_ transformation function.
+
+   Canonicalization is a way to normalise XML output in a way that allows
+   byte-by-byte comparisons and digital signatures.  It reduced the freedom
+   that XML serializers have and instead generates a more constrained XML
+   representation.  The main restrictions regard the placement of namespace
+   declarations, the ordering of attributes, and ignorable whitespace.
+
+   This function takes an XML data string (*xml_data*) or a file path or
+   file-like object (*from_file*) as input, converts it to the canonical
+   form, and writes it out using the *out* file(-like) object, if provided,
+   or returns it as a text string if not.  The output file receives text,
+   not bytes.  It should therefore be opened in text mode with ``utf-8``
+   encoding.
+
+   Typical uses::
+
+      xml_data = "<root>...</root>"
+      print(canonicalize(xml_data))
+
+      with open("c14n_output.xml", mode='w', encoding='utf-8') as out_file:
+          canonicalize(xml_data, out=out_file)
+
+      with open("c14n_output.xml", mode='w', encoding='utf-8') as out_file:
+          canonicalize(from_file="inputfile.xml", out=out_file)
+
+   The configuration *options* are as follows:
+
+   - *with_comments*: set to true to include comments (default: false)
+   - *strip_text*: set to true to strip whitespace before and after text content
+                   (default: false)
+   - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
+                         (default: false)
+   - *qname_aware_tags*: a set of qname aware tag names in which prefixes
+                         should be replaced in text content (default: empty)
+   - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
+                          should be replaced in text content (default: empty)
+   - *exclude_attrs*: a set of attribute names that should not be serialised
+   - *exclude_tags*: a set of tag names that should not be serialised
+
+   In the option list above, "a set" refers to any collection or iterable of
+   strings, no ordering is expected.
+
+   .. versionadded:: 3.8
+
 
 .. function:: Comment(text=None)
 
@@ -1114,6 +1161,19 @@ TreeBuilder Objects
       .. versionadded:: 3.8
 
 
+.. class:: C14NWriterTarget(write, *, \
+             with_comments=False, strip_text=False, rewrite_prefixes=False, \
+             qname_aware_tags=None, qname_aware_attrs=None, \
+             exclude_attrs=None, exclude_tags=None)
+
+   A `C14N 2.0 <https://www.w3.org/TR/xml-c14n2/>`_ writer.  Arguments are the
+   same as for the :func:`canonicalize` function.  This class does not build a
+   tree but translates the callback events directly into a serialised form
+   using the *write* function.
+
+   .. versionadded:: 3.8
+
+
 .. _elementtree-xmlparser-objects:
 
 XMLParser Objects
index bbc55ddd63418d3c98ad6146ad9e37890492873f..37570bcad526084a1e51327ea6138937628bc816 100644 (file)
@@ -525,6 +525,10 @@ xml
   external entities by default.
   (Contributed by Christian Heimes in :issue:`17239`.)
 
+* The :mod:`xml.etree.ElementTree` module provides a new function
+  :func:`–xml.etree.ElementTree.canonicalize()` that implements C14N 2.0.
+  (Contributed by Stefan Behnel in :issue:`13611`.)
+
 
 Optimizations
 =============
index 0abc42a173d93005e7b05c7b9d8ab392651f85bc..a59a11f025da314bc41a45a541564075e9db39e4 100644 (file)
@@ -12,6 +12,7 @@ import io
 import itertools
 import locale
 import operator
+import os
 import pickle
 import sys
 import textwrap
@@ -20,6 +21,7 @@ import unittest
 import warnings
 import weakref
 
+from functools import partial
 from itertools import product, islice
 from test import support
 from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
@@ -3527,6 +3529,231 @@ class NoAcceleratorTest(unittest.TestCase):
         self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
         self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
 
+
+# --------------------------------------------------------------------
+
+def c14n_roundtrip(xml, **options):
+    return pyET.canonicalize(xml, **options)
+
+
+class C14NTest(unittest.TestCase):
+    maxDiff = None
+
+    #
+    # simple roundtrip tests (from c14n.py)
+
+    def test_simple_roundtrip(self):
+        # Basics
+        self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
+        self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
+                '<doc xmlns="uri"></doc>')
+        self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
+            '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
+        self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
+            '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
+        self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
+            '<elem></elem>')
+
+        # C14N spec
+        self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
+            '<doc>Hello, world!</doc>')
+        self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
+            '<value>2</value>')
+        self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
+            '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
+        self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
+            '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
+        self.assertEqual(c14n_roundtrip("<norm attr=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
+            '<norm attr=" \'    &#xD;&#xA;&#x9;   \' "></norm>')
+        self.assertEqual(c14n_roundtrip("<normNames attr='   A   &#x20;&#13;&#xa;&#9;   B   '/>"),
+            '<normNames attr="   A    &#xD;&#xA;&#x9;   B   "></normNames>')
+        self.assertEqual(c14n_roundtrip("<normId id=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
+            '<normId id=" \'    &#xD;&#xA;&#x9;   \' "></normId>')
+
+        # fragments from PJ's tests
+        #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
+        #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
+
+    def test_c14n_exclusion(self):
+        xml = textwrap.dedent("""\
+        <root xmlns:x="http://example.com/x">
+            <a x:attr="attrx">
+                <b>abtext</b>
+            </a>
+            <b>btext</b>
+            <c>
+                <x:d>dtext</x:d>
+            </c>
+        </root>
+        """)
+        self.assertEqual(
+            c14n_roundtrip(xml, strip_text=True),
+            '<root>'
+            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
+            '<b>btext</b>'
+            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
+            '</root>')
+        self.assertEqual(
+            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
+            '<root>'
+            '<a><b>abtext</b></a>'
+            '<b>btext</b>'
+            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
+            '</root>')
+        self.assertEqual(
+            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
+            '<root>'
+            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
+            '<b>btext</b>'
+            '<c></c>'
+            '</root>')
+        self.assertEqual(
+            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
+                           exclude_tags=['{http://example.com/x}d']),
+            '<root>'
+            '<a><b>abtext</b></a>'
+            '<b>btext</b>'
+            '<c></c>'
+            '</root>')
+        self.assertEqual(
+            c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
+            '<root>'
+            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
+            '</root>')
+        self.assertEqual(
+            c14n_roundtrip(xml, exclude_tags=['a', 'b']),
+            '<root>\n'
+            '    \n'
+            '    \n'
+            '    <c>\n'
+            '        <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
+            '    </c>\n'
+            '</root>')
+        self.assertEqual(
+            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
+            '<root>'
+            '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
+            '<c></c>'
+            '</root>')
+        self.assertEqual(
+            c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
+            '<root>\n'
+            '    <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
+            '        \n'
+            '    </a>\n'
+            '    \n'
+            '    <c>\n'
+            '        \n'
+            '    </c>\n'
+            '</root>')
+
+    #
+    # basic method=c14n tests from the c14n 2.0 specification.  uses
+    # test files under xmltestdata/c14n-20.
+
+    # note that this uses generated C14N versions of the standard ET.write
+    # output, not roundtripped C14N (see above).
+
+    def test_xml_c14n2(self):
+        datadir = findfile("c14n-20", subdir="xmltestdata")
+        full_path = partial(os.path.join, datadir)
+
+        files = [filename[:-4] for filename in sorted(os.listdir(datadir))
+                 if filename.endswith('.xml')]
+        input_files = [
+            filename for filename in files
+            if filename.startswith('in')
+        ]
+        configs = {
+            filename: {
+                # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+                option.tag.split('}')[-1]: ((option.text or '').strip(), option)
+                for option in ET.parse(full_path(filename) + ".xml").getroot()
+            }
+            for filename in files
+            if filename.startswith('c14n')
+        }
+
+        tests = {
+            input_file: [
+                (filename, configs[filename.rsplit('_', 1)[-1]])
+                for filename in files
+                if filename.startswith(f'out_{input_file}_')
+                and filename.rsplit('_', 1)[-1] in configs
+            ]
+            for input_file in input_files
+        }
+
+        # Make sure we found all test cases.
+        self.assertEqual(30, len([
+            output_file for output_files in tests.values()
+            for output_file in output_files]))
+
+        def get_option(config, option_name, default=None):
+            return config.get(option_name, (default, ()))[0]
+
+        for input_file, output_files in tests.items():
+            for output_file, config in output_files:
+                keep_comments = get_option(
+                    config, 'IgnoreComments') == 'true'  # no, it's right :)
+                strip_text = get_option(
+                    config, 'TrimTextNodes') == 'true'
+                rewrite_prefixes = get_option(
+                    config, 'PrefixRewrite') == 'sequential'
+                if 'QNameAware' in config:
+                    qattrs = [
+                        f"{{{el.get('NS')}}}{el.get('Name')}"
+                        for el in config['QNameAware'][1].findall(
+                            '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
+                    ]
+                    qtags = [
+                        f"{{{el.get('NS')}}}{el.get('Name')}"
+                        for el in config['QNameAware'][1].findall(
+                            '{http://www.w3.org/2010/xml-c14n2}Element')
+                    ]
+                else:
+                    qtags = qattrs = None
+
+                # Build subtest description from config.
+                config_descr = ','.join(
+                    f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
+                    for name, (value, children) in sorted(config.items())
+                )
+
+                with self.subTest(f"{output_file}({config_descr})"):
+                    if input_file == 'inNsRedecl' and not rewrite_prefixes:
+                        self.skipTest(
+                            f"Redeclared namespace handling is not supported in {output_file}")
+                    if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
+                        self.skipTest(
+                            f"Redeclared namespace handling is not supported in {output_file}")
+                    if 'QNameAware' in config and config['QNameAware'][1].find(
+                            '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
+                        self.skipTest(
+                            f"QName rewriting in XPath text is not supported in {output_file}")
+
+                    f = full_path(input_file + ".xml")
+                    if input_file == 'inC14N5':
+                        # Hack: avoid setting up external entity resolution in the parser.
+                        with open(full_path('world.txt'), 'rb') as entity_file:
+                            with open(f, 'rb') as f:
+                                f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
+
+                    text = ET.canonicalize(
+                        from_file=f,
+                        with_comments=keep_comments,
+                        strip_text=strip_text,
+                        rewrite_prefixes=rewrite_prefixes,
+                        qname_aware_tags=qtags, qname_aware_attrs=qattrs)
+
+                    with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
+                        expected = f.read()
+                        if input_file == 'inC14N3':
+                            # FIXME: cET resolves default attributes but ET does not!
+                            expected = expected.replace(' attr="default"', '')
+                            text = text.replace(' attr="default"', '')
+                    self.assertEqual(expected, text)
+
 # --------------------------------------------------------------------
 
 
@@ -3559,6 +3786,8 @@ def test_main(module=None):
         XMLParserTest,
         XMLPullParserTest,
         BugsTest,
+        KeywordArgsTest,
+        C14NTest,
         ]
 
     # These tests will only run for the pure-Python version that doesn't import
diff --git a/Lib/test/xmltestdata/c14n-20/c14nComment.xml b/Lib/test/xmltestdata/c14n-20/c14nComment.xml
new file mode 100644 (file)
index 0000000..e95aa30
--- /dev/null
@@ -0,0 +1,4 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:IgnoreComments>true</c14n2:IgnoreComments>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/c14nDefault.xml
new file mode 100644 (file)
index 0000000..c136414
--- /dev/null
@@ -0,0 +1,3 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" Algorithm="http://www.w3.org/2010/xml-c14n2">
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/c14nPrefix.xml
new file mode 100644 (file)
index 0000000..fb233b4
--- /dev/null
@@ -0,0 +1,4 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nPrefixQname.xml b/Lib/test/xmltestdata/c14n-20/c14nPrefixQname.xml
new file mode 100644 (file)
index 0000000..23188ee
--- /dev/null
@@ -0,0 +1,7 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+  <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+  <c14n2:QNameAware>
+   <c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
+  </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nPrefixQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/c14nPrefixQnameXpathElem.xml
new file mode 100644 (file)
index 0000000..626fc48
--- /dev/null
@@ -0,0 +1,8 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+  <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
+  <c14n2:QNameAware>
+   <c14n2:Element Name="bar" NS="http://a"/>
+   <c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
+  </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nQname.xml b/Lib/test/xmltestdata/c14n-20/c14nQname.xml
new file mode 100644 (file)
index 0000000..919e590
--- /dev/null
@@ -0,0 +1,6 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+  <c14n2:QNameAware>
+   <c14n2:QualifiedAttr Name="type" NS="http://www.w3.org/2001/XMLSchema-instance"/>
+  </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nQnameElem.xml b/Lib/test/xmltestdata/c14n-20/c14nQnameElem.xml
new file mode 100644 (file)
index 0000000..0321f80
--- /dev/null
@@ -0,0 +1,6 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+  <c14n2:QNameAware>
+   <c14n2:Element Name="bar" NS="http://a"/>
+  </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/c14nQnameXpathElem.xml
new file mode 100644 (file)
index 0000000..c4890bc
--- /dev/null
@@ -0,0 +1,7 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+  <c14n2:QNameAware>
+   <c14n2:Element Name="bar" NS="http://a"/>
+   <c14n2:XPathElement Name="IncludedXPath" NS="http://www.w3.org/2010/xmldsig2#"/>
+  </c14n2:QNameAware>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/c14nTrim.xml
new file mode 100644 (file)
index 0000000..ccb9cf6
--- /dev/null
@@ -0,0 +1,4 @@
+<dsig:CanonicalizationMethod xmlns:dsig="http://www.w3.org/2000/09/xmldsig#" xmlns:c14n2="http://www.w3.org/2010/xml-c14n2" Algorithm="http://www.w3.org/2010/xml-c14n2">
+ <c14n2:TrimTextNodes>true</c14n2:TrimTextNodes>
+</dsig:CanonicalizationMethod>
+
diff --git a/Lib/test/xmltestdata/c14n-20/doc.dtd b/Lib/test/xmltestdata/c14n-20/doc.dtd
new file mode 100644 (file)
index 0000000..5c5d544
--- /dev/null
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!ELEMENT doc (#PCDATA)>
+
+
+
diff --git a/Lib/test/xmltestdata/c14n-20/doc.xsl b/Lib/test/xmltestdata/c14n-20/doc.xsl
new file mode 100644 (file)
index 0000000..a3f2348
--- /dev/null
@@ -0,0 +1,5 @@
+<?xml version="1.0"?>
+<xsl:stylesheet version="1.0"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+                >
+</xsl:stylesheet>
diff --git a/Lib/test/xmltestdata/c14n-20/inC14N1.xml b/Lib/test/xmltestdata/c14n-20/inC14N1.xml
new file mode 100644 (file)
index 0000000..ed450c7
--- /dev/null
@@ -0,0 +1,14 @@
+<?xml version="1.0"?>
+
+<?xml-stylesheet   href="doc.xsl"
+   type="text/xsl"   ?>
+
+<!DOCTYPE doc SYSTEM "doc.dtd">
+
+<doc>Hello, world!<!-- Comment 1 --></doc>
+
+<?pi-without-data     ?>
+
+<!-- Comment 2 -->
+
+<!-- Comment 3 -->
diff --git a/Lib/test/xmltestdata/c14n-20/inC14N2.xml b/Lib/test/xmltestdata/c14n-20/inC14N2.xml
new file mode 100644 (file)
index 0000000..74eeea1
--- /dev/null
@@ -0,0 +1,11 @@
+<doc>
+   <clean>   </clean>
+   <dirty>   A   B   </dirty>
+   <mixed>
+      A
+      <clean>   </clean>
+      B
+      <dirty>   A   B   </dirty>
+      C
+   </mixed>
+</doc>
diff --git a/Lib/test/xmltestdata/c14n-20/inC14N3.xml b/Lib/test/xmltestdata/c14n-20/inC14N3.xml
new file mode 100644 (file)
index 0000000..fea7821
--- /dev/null
@@ -0,0 +1,18 @@
+<!DOCTYPE doc [<!ATTLIST e9 attr CDATA "default">]>
+<doc>
+   <e1   />
+   <e2   ></e2>
+   <e3   name = "elem3"   id="elem3"   />
+   <e4   name="elem4"   id="elem4"   ></e4>
+   <e5 a:attr="out" b:attr="sorted" attr2="all" attr="I'm"
+      xmlns:b="http://www.ietf.org"
+      xmlns:a="http://www.w3.org"
+      xmlns="http://example.org"/>
+   <e6 xmlns="" xmlns:a="http://www.w3.org">
+      <e7 xmlns="http://www.ietf.org">
+         <e8 xmlns="" xmlns:a="http://www.w3.org">
+            <e9 xmlns="" xmlns:a="http://www.ietf.org"/>
+         </e8>
+      </e7>
+   </e6>
+</doc> 
diff --git a/Lib/test/xmltestdata/c14n-20/inC14N4.xml b/Lib/test/xmltestdata/c14n-20/inC14N4.xml
new file mode 100644 (file)
index 0000000..909a847
--- /dev/null
@@ -0,0 +1,13 @@
+<!DOCTYPE doc [
+<!ATTLIST normId id ID #IMPLIED>
+<!ATTLIST normNames attr NMTOKENS #IMPLIED>
+]>
+<doc>
+   <text>First line&#x0d;&#10;Second line</text>
+   <value>&#x32;</value>
+   <compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>
+   <compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>
+   <norm attr=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>
+   <normNames attr='   A   &#x20;&#13;&#xa;&#9;   B   '/>
+   <normId id=' &apos;&#x20;&#13;&#xa;&#9; &apos; '/>
+</doc>
diff --git a/Lib/test/xmltestdata/c14n-20/inC14N5.xml b/Lib/test/xmltestdata/c14n-20/inC14N5.xml
new file mode 100644 (file)
index 0000000..501161b
--- /dev/null
@@ -0,0 +1,12 @@
+<!DOCTYPE doc [
+<!ATTLIST doc attrExtEnt CDATA #IMPLIED>
+<!ENTITY ent1 "Hello">
+<!ENTITY ent2 SYSTEM "world.txt">
+<!ENTITY entExt SYSTEM "earth.gif" NDATA gif>
+<!NOTATION gif SYSTEM "viewgif.exe">
+]>
+<doc attrExtEnt="entExt">
+   &ent1;, &ent2;!
+</doc>
+
+<!-- Let world.txt contain "world" (excluding the quotes) -->
diff --git a/Lib/test/xmltestdata/c14n-20/inC14N6.xml b/Lib/test/xmltestdata/c14n-20/inC14N6.xml
new file mode 100644 (file)
index 0000000..31e2071
--- /dev/null
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<doc>&#169;</doc>
diff --git a/Lib/test/xmltestdata/c14n-20/inNsContent.xml b/Lib/test/xmltestdata/c14n-20/inNsContent.xml
new file mode 100644 (file)
index 0000000..b992466
--- /dev/null
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:child="http://c" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+ <a:bar>xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo>
diff --git a/Lib/test/xmltestdata/c14n-20/inNsDefault.xml b/Lib/test/xmltestdata/c14n-20/inNsDefault.xml
new file mode 100644 (file)
index 0000000..3e0d323
--- /dev/null
@@ -0,0 +1,3 @@
+<foo xmlns:a="http://a" xmlns:b="http://b">
+ <b:bar b:att1="val" att2="val"/>
+</foo>
diff --git a/Lib/test/xmltestdata/c14n-20/inNsPushdown.xml b/Lib/test/xmltestdata/c14n-20/inNsPushdown.xml
new file mode 100644 (file)
index 0000000..daa67d8
--- /dev/null
@@ -0,0 +1,6 @@
+<a:foo xmlns:a="http://a" xmlns:b="http://b" xmlns:c="http://c">
+ <b:bar/>
+ <b:bar/>
+ <b:bar/>
+ <a:bar b:att1="val"/>
+</a:foo>
diff --git a/Lib/test/xmltestdata/c14n-20/inNsRedecl.xml b/Lib/test/xmltestdata/c14n-20/inNsRedecl.xml
new file mode 100644 (file)
index 0000000..10bd97b
--- /dev/null
@@ -0,0 +1,3 @@
+<foo xmlns:a="http://z3" xmlns:b="http://z2" a:att1="val1" b:att2="val2"> 
+ <bar xmlns="http://z0" xmlns:a="http://z2" a:att1="val1" b:att2="val2" xmlns:b="http://z3" />
+</foo>
diff --git a/Lib/test/xmltestdata/c14n-20/inNsSort.xml b/Lib/test/xmltestdata/c14n-20/inNsSort.xml
new file mode 100644 (file)
index 0000000..8e9fc01
--- /dev/null
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://z3" xmlns:b="http://z2" b:att1="val1" c:att3="val3" b:att2="val2" xmlns:c="http://z1" xmlns:d="http://z0">
+ <c:bar/>
+ <c:bar d:att3="val3"/>
+</a:foo>
diff --git a/Lib/test/xmltestdata/c14n-20/inNsSuperfluous.xml b/Lib/test/xmltestdata/c14n-20/inNsSuperfluous.xml
new file mode 100644 (file)
index 0000000..f77720f
--- /dev/null
@@ -0,0 +1,4 @@
+<foo xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2" xmlns="http://z0"> 
+ <c:bar xmlns:a="http://z0" xmlns:c="http://z0" c:att3="val3"/>
+ <d:bar xmlns:d="http://z0"/>
+</foo>
diff --git a/Lib/test/xmltestdata/c14n-20/inNsXml.xml b/Lib/test/xmltestdata/c14n-20/inNsXml.xml
new file mode 100644 (file)
index 0000000..7520cf3
--- /dev/null
@@ -0,0 +1,3 @@
+<foo xmlns="http://z0" xml:id="23">
+  <bar xsi:type="xsd:string" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">data</bar>
+</foo>
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nComment.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nComment.xml
new file mode 100644 (file)
index 0000000..d98d168
--- /dev/null
@@ -0,0 +1,6 @@
+<?xml-stylesheet href="doc.xsl"
+   type="text/xsl"   ?>
+<doc>Hello, world!<!-- Comment 1 --></doc>
+<?pi-without-data?>
+<!-- Comment 2 -->
+<!-- Comment 3 -->
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N1_c14nDefault.xml
new file mode 100644 (file)
index 0000000..af9a977
--- /dev/null
@@ -0,0 +1,4 @@
+<?xml-stylesheet href="doc.xsl"
+   type="text/xsl"   ?>
+<doc>Hello, world!</doc>
+<?pi-without-data?>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nDefault.xml
new file mode 100644 (file)
index 0000000..2afa15c
--- /dev/null
@@ -0,0 +1,11 @@
+<doc>
+   <clean>   </clean>
+   <dirty>   A   B   </dirty>
+   <mixed>
+      A
+      <clean>   </clean>
+      B
+      <dirty>   A   B   </dirty>
+      C
+   </mixed>
+</doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N2_c14nTrim.xml
new file mode 100644 (file)
index 0000000..7a1dc32
--- /dev/null
@@ -0,0 +1 @@
+<doc><clean></clean><dirty>A   B</dirty><mixed>A<clean></clean>B<dirty>A   B</dirty>C</mixed></doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nDefault.xml
new file mode 100644 (file)
index 0000000..662e108
--- /dev/null
@@ -0,0 +1,14 @@
+<doc>
+   <e1></e1>
+   <e2></e2>
+   <e3 id="elem3" name="elem3"></e3>
+   <e4 id="elem4" name="elem4"></e4>
+   <e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5>
+   <e6>
+      <e7 xmlns="http://www.ietf.org">
+         <e8 xmlns="">
+            <e9 attr="default"></e9>
+         </e8>
+      </e7>
+   </e6>
+</doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nPrefix.xml
new file mode 100644 (file)
index 0000000..041e1ec
--- /dev/null
@@ -0,0 +1,14 @@
+<n0:doc xmlns:n0="">
+   <n0:e1></n0:e1>
+   <n0:e2></n0:e2>
+   <n0:e3 id="elem3" name="elem3"></n0:e3>
+   <n0:e4 id="elem4" name="elem4"></n0:e4>
+   <n1:e5 xmlns:n1="http://example.org" xmlns:n2="http://www.ietf.org" xmlns:n3="http://www.w3.org" attr="I'm" attr2="all" n2:attr="sorted" n3:attr="out"></n1:e5>
+   <n0:e6>
+      <n2:e7 xmlns:n2="http://www.ietf.org">
+         <n0:e8>
+            <n0:e9 attr="default"></n0:e9>
+         </n0:e8>
+      </n2:e7>
+   </n0:e6>
+</n0:doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N3_c14nTrim.xml
new file mode 100644 (file)
index 0000000..4f35ad9
--- /dev/null
@@ -0,0 +1 @@
+<doc><e1></e1><e2></e2><e3 id="elem3" name="elem3"></e3><e4 id="elem4" name="elem4"></e4><e5 xmlns="http://example.org" xmlns:a="http://www.w3.org" xmlns:b="http://www.ietf.org" attr="I'm" attr2="all" b:attr="sorted" a:attr="out"></e5><e6><e7 xmlns="http://www.ietf.org"><e8 xmlns=""><e9 attr="default"></e9></e8></e7></e6></doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nDefault.xml
new file mode 100644 (file)
index 0000000..243d0e6
--- /dev/null
@@ -0,0 +1,10 @@
+<doc>
+   <text>First line&#xD;
+Second line</text>
+   <value>2</value>
+   <compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>
+   <compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>
+   <norm attr=" '    &#xD;&#xA;&#x9;   ' "></norm>
+   <normNames attr="A &#xD;&#xA;&#x9; B"></normNames>
+   <normId id="' &#xD;&#xA;&#x9; '"></normId>
+</doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N4_c14nTrim.xml
new file mode 100644 (file)
index 0000000..24d83ba
--- /dev/null
@@ -0,0 +1,2 @@
+<doc><text>First line&#xD;
+Second line</text><value>2</value><compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute><compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute><norm attr=" '    &#xD;&#xA;&#x9;   ' "></norm><normNames attr="A &#xD;&#xA;&#x9; B"></normNames><normId id="' &#xD;&#xA;&#x9; '"></normId></doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nDefault.xml
new file mode 100644 (file)
index 0000000..c232e74
--- /dev/null
@@ -0,0 +1,3 @@
+<doc attrExtEnt="entExt">
+   Hello, world!
+</doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nTrim.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N5_c14nTrim.xml
new file mode 100644 (file)
index 0000000..3fa84b1
--- /dev/null
@@ -0,0 +1 @@
+<doc attrExtEnt="entExt">Hello, world!</doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inC14N6_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inC14N6_c14nDefault.xml
new file mode 100644 (file)
index 0000000..0be38f9
--- /dev/null
@@ -0,0 +1 @@
+<doc>©</doc>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nDefault.xml
new file mode 100644 (file)
index 0000000..62d7e00
--- /dev/null
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a">
+ <a:bar>xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nPrefixQnameXpathElem.xml
new file mode 100644 (file)
index 0000000..20e1c2e
--- /dev/null
@@ -0,0 +1,4 @@
+<n0:foo xmlns:n0="http://a">
+ <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema">n1:string</n0:bar>
+ <n4:IncludedXPath xmlns:n2="http://b" xmlns:n3="http://schemas.xmlsoap.org/wsdl/soap/" xmlns:n4="http://www.w3.org/2010/xmldsig2#">/n3:body/child::n2:foo[@att1 != "c:val" and @att2 != 'xsd:string']</n4:IncludedXPath>
+</n0:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameElem.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameElem.xml
new file mode 100644 (file)
index 0000000..db8680d
--- /dev/null
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a">
+ <a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:dsig2="http://www.w3.org/2010/xmldsig2#">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameXpathElem.xml b/Lib/test/xmltestdata/c14n-20/out_inNsContent_c14nQnameXpathElem.xml
new file mode 100644 (file)
index 0000000..df3b215
--- /dev/null
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://a">
+ <a:bar xmlns:xsd="http://www.w3.org/2001/XMLSchema">xsd:string</a:bar>
+ <dsig2:IncludedXPath xmlns:b="http://b" xmlns:dsig2="http://www.w3.org/2010/xmldsig2#" xmlns:soap-env="http://schemas.xmlsoap.org/wsdl/soap/">/soap-env:body/child::b:foo[@att1 != "c:val" and @att2 != 'xsd:string']</dsig2:IncludedXPath>
+</a:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nDefault.xml
new file mode 100644 (file)
index 0000000..674b076
--- /dev/null
@@ -0,0 +1,3 @@
+<foo>
+ <b:bar xmlns:b="http://b" att2="val" b:att1="val"></b:bar>
+</foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsDefault_c14nPrefix.xml
new file mode 100644 (file)
index 0000000..83edaae
--- /dev/null
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="">
+ <n1:bar xmlns:n1="http://b" att2="val" n1:att1="val"></n1:bar>
+</n0:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nDefault.xml
new file mode 100644 (file)
index 0000000..fa4f21b
--- /dev/null
@@ -0,0 +1,6 @@
+<a:foo xmlns:a="http://a">
+ <b:bar xmlns:b="http://b"></b:bar>
+ <b:bar xmlns:b="http://b"></b:bar>
+ <b:bar xmlns:b="http://b"></b:bar>
+ <a:bar xmlns:b="http://b" b:att1="val"></a:bar>
+</a:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsPushdown_c14nPrefix.xml
new file mode 100644 (file)
index 0000000..6d57920
--- /dev/null
@@ -0,0 +1,6 @@
+<n0:foo xmlns:n0="http://a">
+ <n1:bar xmlns:n1="http://b"></n1:bar>
+ <n1:bar xmlns:n1="http://b"></n1:bar>
+ <n1:bar xmlns:n1="http://b"></n1:bar>
+ <n0:bar xmlns:n1="http://b" n1:att1="val"></n0:bar>
+</n0:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nDefault.xml
new file mode 100644 (file)
index 0000000..ba37f92
--- /dev/null
@@ -0,0 +1,3 @@
+<foo xmlns:a="http://z3" xmlns:b="http://z2" b:att2="val2" a:att1="val1"> 
+ <bar xmlns="http://z0" xmlns:a="http://z2" xmlns:b="http://z3" a:att1="val1" b:att2="val2"></bar>
+</foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsRedecl_c14nPrefix.xml
new file mode 100644 (file)
index 0000000..af3bb2d
--- /dev/null
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="" xmlns:n1="http://z2" xmlns:n2="http://z3" n1:att2="val2" n2:att1="val1"> 
+ <n3:bar xmlns:n3="http://z0" n1:att1="val1" n2:att2="val2"></n3:bar>
+</n0:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nDefault.xml
new file mode 100644 (file)
index 0000000..8a92c5c
--- /dev/null
@@ -0,0 +1,4 @@
+<a:foo xmlns:a="http://z3" xmlns:b="http://z2" xmlns:c="http://z1" c:att3="val3" b:att1="val1" b:att2="val2">
+ <c:bar></c:bar>
+ <c:bar xmlns:d="http://z0" d:att3="val3"></c:bar>
+</a:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSort_c14nPrefix.xml
new file mode 100644 (file)
index 0000000..8d44c84
--- /dev/null
@@ -0,0 +1,4 @@
+<n2:foo xmlns:n0="http://z1" xmlns:n1="http://z2" xmlns:n2="http://z3" n0:att3="val3" n1:att1="val1" n1:att2="val2">
+ <n0:bar></n0:bar>
+ <n0:bar xmlns:n3="http://z0" n3:att3="val3"></n0:bar>
+</n2:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nDefault.xml
new file mode 100644 (file)
index 0000000..6bb862d
--- /dev/null
@@ -0,0 +1,4 @@
+<foo xmlns="http://z0" xmlns:a="http://z0" xmlns:b="http://z0" a:att1="val1" b:att2="val2"> 
+ <c:bar xmlns:c="http://z0" c:att3="val3"></c:bar>
+ <d:bar xmlns:d="http://z0"></d:bar>
+</foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsSuperfluous_c14nPrefix.xml
new file mode 100644 (file)
index 0000000..700a16d
--- /dev/null
@@ -0,0 +1,4 @@
+<n0:foo xmlns:n0="http://z0" n0:att1="val1" n0:att2="val2"> 
+ <n0:bar n0:att3="val3"></n0:bar>
+ <n0:bar></n0:bar>
+</n0:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nDefault.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nDefault.xml
new file mode 100644 (file)
index 0000000..1689f3b
--- /dev/null
@@ -0,0 +1,3 @@
+<foo xmlns="http://z0" xml:id="23">
+  <bar xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar>
+</foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefix.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefix.xml
new file mode 100644 (file)
index 0000000..38508a4
--- /dev/null
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="http://z0" xml:id="23">
+  <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema-instance" n1:type="xsd:string">data</n0:bar>
+</n0:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefixQname.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nPrefixQname.xml
new file mode 100644 (file)
index 0000000..867980f
--- /dev/null
@@ -0,0 +1,3 @@
+<n0:foo xmlns:n0="http://z0" xml:id="23">
+  <n0:bar xmlns:n1="http://www.w3.org/2001/XMLSchema" xmlns:n2="http://www.w3.org/2001/XMLSchema-instance" n2:type="n1:string">data</n0:bar>
+</n0:foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nQname.xml b/Lib/test/xmltestdata/c14n-20/out_inNsXml_c14nQname.xml
new file mode 100644 (file)
index 0000000..0300f9d
--- /dev/null
@@ -0,0 +1,3 @@
+<foo xmlns="http://z0" xml:id="23">
+  <bar xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="xsd:string">data</bar>
+</foo>
\ No newline at end of file
diff --git a/Lib/test/xmltestdata/c14n-20/world.txt b/Lib/test/xmltestdata/c14n-20/world.txt
new file mode 100644 (file)
index 0000000..04fea06
--- /dev/null
@@ -0,0 +1 @@
+world
\ No newline at end of file
index 5b26ac72fd1aaea96caa93c95bf02b49299e255d..645e999a0be6ca3c0bc4d59aa96411110a26b189 100644 (file)
@@ -87,6 +87,7 @@ __all__ = [
     "XML", "XMLID",
     "XMLParser", "XMLPullParser",
     "register_namespace",
+    "canonicalize", "C14NWriterTarget",
     ]
 
 VERSION = "1.3.0"
@@ -1711,6 +1712,336 @@ class XMLParser:
             del self.target, self._target
 
 
+# --------------------------------------------------------------------
+# C14N 2.0
+
+def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
+    """Convert XML to its C14N 2.0 serialised form.
+
+    If *out* is provided, it must be a file or file-like object that receives
+    the serialised canonical XML output (text, not bytes) through its ``.write()``
+    method.  To write to a file, open it in text mode with encoding "utf-8".
+    If *out* is not provided, this function returns the output as text string.
+
+    Either *xml_data* (an XML string) or *from_file* (a file path or
+    file-like object) must be provided as input.
+
+    The configuration options are the same as for the ``C14NWriterTarget``.
+    """
+    if xml_data is None and from_file is None:
+        raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
+    sio = None
+    if out is None:
+        sio = out = io.StringIO()
+
+    parser = XMLParser(target=C14NWriterTarget(out.write, **options))
+
+    if xml_data is not None:
+        parser.feed(xml_data)
+        parser.close()
+    elif from_file is not None:
+        parse(from_file, parser=parser)
+
+    return sio.getvalue() if sio is not None else None
+
+
+_looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
+
+
+class C14NWriterTarget:
+    """
+    Canonicalization writer target for the XMLParser.
+
+    Serialises parse events to XML C14N 2.0.
+
+    The *write* function is used for writing out the resulting data stream
+    as text (not bytes).  To write to a file, open it in text mode with encoding
+    "utf-8" and pass its ``.write`` method.
+
+    Configuration options:
+
+    - *with_comments*: set to true to include comments
+    - *strip_text*: set to true to strip whitespace before and after text content
+    - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
+    - *qname_aware_tags*: a set of qname aware tag names in which prefixes
+                          should be replaced in text content
+    - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
+                           should be replaced in text content
+    - *exclude_attrs*: a set of attribute names that should not be serialised
+    - *exclude_tags*: a set of tag names that should not be serialised
+    """
+    def __init__(self, write, *,
+                 with_comments=False, strip_text=False, rewrite_prefixes=False,
+                 qname_aware_tags=None, qname_aware_attrs=None,
+                 exclude_attrs=None, exclude_tags=None):
+        self._write = write
+        self._data = []
+        self._with_comments = with_comments
+        self._strip_text = strip_text
+        self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
+        self._exclude_tags = set(exclude_tags) if exclude_tags else None
+
+        self._rewrite_prefixes = rewrite_prefixes
+        if qname_aware_tags:
+            self._qname_aware_tags = set(qname_aware_tags)
+        else:
+            self._qname_aware_tags = None
+        if qname_aware_attrs:
+            self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
+        else:
+            self._find_qname_aware_attrs = None
+
+        # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
+        self._declared_ns_stack = [[
+            ("http://www.w3.org/XML/1998/namespace", "xml"),
+        ]]
+        # Stack with user declared namespace prefixes as (uri, prefix) pairs.
+        self._ns_stack = []
+        if not rewrite_prefixes:
+            self._ns_stack.append(list(_namespace_map.items()))
+        self._ns_stack.append([])
+        self._prefix_map = {}
+        self._preserve_space = [False]
+        self._pending_start = None
+        self._root_seen = False
+        self._root_done = False
+        self._ignored_depth = 0
+
+    def _iter_namespaces(self, ns_stack, _reversed=reversed):
+        for namespaces in _reversed(ns_stack):
+            if namespaces:  # almost no element declares new namespaces
+                yield from namespaces
+
+    def _resolve_prefix_name(self, prefixed_name):
+        prefix, name = prefixed_name.split(':', 1)
+        for uri, p in self._iter_namespaces(self._ns_stack):
+            if p == prefix:
+                return f'{{{uri}}}{name}'
+        raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
+
+    def _qname(self, qname, uri=None):
+        if uri is None:
+            uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
+        else:
+            tag = qname
+
+        prefixes_seen = set()
+        for u, prefix in self._iter_namespaces(self._declared_ns_stack):
+            if u == uri and prefix not in prefixes_seen:
+                return f'{prefix}:{tag}' if prefix else tag, tag, uri
+            prefixes_seen.add(prefix)
+
+        # Not declared yet => add new declaration.
+        if self._rewrite_prefixes:
+            if uri in self._prefix_map:
+                prefix = self._prefix_map[uri]
+            else:
+                prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
+            self._declared_ns_stack[-1].append((uri, prefix))
+            return f'{prefix}:{tag}', tag, uri
+
+        if not uri and '' not in prefixes_seen:
+            # No default namespace declared => no prefix needed.
+            return tag, tag, uri
+
+        for u, prefix in self._iter_namespaces(self._ns_stack):
+            if u == uri:
+                self._declared_ns_stack[-1].append((uri, prefix))
+                return f'{prefix}:{tag}' if prefix else tag, tag, uri
+
+        raise ValueError(f'Namespace "{uri}" is not declared in scope')
+
+    def data(self, data):
+        if not self._ignored_depth:
+            self._data.append(data)
+
+    def _flush(self, _join_text=''.join):
+        data = _join_text(self._data)
+        del self._data[:]
+        if self._strip_text and not self._preserve_space[-1]:
+            data = data.strip()
+        if self._pending_start is not None:
+            args, self._pending_start = self._pending_start, None
+            qname_text = data if data and _looks_like_prefix_name(data) else None
+            self._start(*args, qname_text)
+            if qname_text is not None:
+                return
+        if data and self._root_seen:
+            self._write(_escape_cdata_c14n(data))
+
+    def start_ns(self, prefix, uri):
+        if self._ignored_depth:
+            return
+        # we may have to resolve qnames in text content
+        if self._data:
+            self._flush()
+        self._ns_stack[-1].append((uri, prefix))
+
+    def start(self, tag, attrs):
+        if self._exclude_tags is not None and (
+                self._ignored_depth or tag in self._exclude_tags):
+            self._ignored_depth += 1
+            return
+        if self._data:
+            self._flush()
+
+        new_namespaces = []
+        self._declared_ns_stack.append(new_namespaces)
+
+        if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
+            # Need to parse text first to see if it requires a prefix declaration.
+            self._pending_start = (tag, attrs, new_namespaces)
+            return
+        self._start(tag, attrs, new_namespaces)
+
+    def _start(self, tag, attrs, new_namespaces, qname_text=None):
+        if self._exclude_attrs is not None and attrs:
+            attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
+
+        qnames = {tag, *attrs}
+        resolved_names = {}
+
+        # Resolve prefixes in attribute and tag text.
+        if qname_text is not None:
+            qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
+            qnames.add(qname)
+        if self._find_qname_aware_attrs is not None and attrs:
+            qattrs = self._find_qname_aware_attrs(attrs)
+            if qattrs:
+                for attr_name in qattrs:
+                    value = attrs[attr_name]
+                    if _looks_like_prefix_name(value):
+                        qname = resolved_names[value] = self._resolve_prefix_name(value)
+                        qnames.add(qname)
+            else:
+                qattrs = None
+        else:
+            qattrs = None
+
+        # Assign prefixes in lexicographical order of used URIs.
+        parse_qname = self._qname
+        parsed_qnames = {n: parse_qname(n) for n in sorted(
+            qnames, key=lambda n: n.split('}', 1))}
+
+        # Write namespace declarations in prefix order ...
+        if new_namespaces:
+            attr_list = [
+                ('xmlns:' + prefix if prefix else 'xmlns', uri)
+                for uri, prefix in new_namespaces
+            ]
+            attr_list.sort()
+        else:
+            # almost always empty
+            attr_list = []
+
+        # ... followed by attributes in URI+name order
+        if attrs:
+            for k, v in sorted(attrs.items()):
+                if qattrs is not None and k in qattrs and v in resolved_names:
+                    v = parsed_qnames[resolved_names[v]][0]
+                attr_qname, attr_name, uri = parsed_qnames[k]
+                # No prefix for attributes in default ('') namespace.
+                attr_list.append((attr_qname if uri else attr_name, v))
+
+        # Honour xml:space attributes.
+        space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
+        self._preserve_space.append(
+            space_behaviour == 'preserve' if space_behaviour
+            else self._preserve_space[-1])
+
+        # Write the tag.
+        write = self._write
+        write('<' + parsed_qnames[tag][0])
+        if attr_list:
+            write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
+        write('>')
+
+        # Write the resolved qname text content.
+        if qname_text is not None:
+            write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
+
+        self._root_seen = True
+        self._ns_stack.append([])
+
+    def end(self, tag):
+        if self._ignored_depth:
+            self._ignored_depth -= 1
+            return
+        if self._data:
+            self._flush()
+        self._write(f'</{self._qname(tag)[0]}>')
+        self._preserve_space.pop()
+        self._root_done = len(self._preserve_space) == 1
+        self._declared_ns_stack.pop()
+        self._ns_stack.pop()
+
+    def comment(self, text):
+        if not self._with_comments:
+            return
+        if self._ignored_depth:
+            return
+        if self._root_done:
+            self._write('\n')
+        elif self._root_seen and self._data:
+            self._flush()
+        self._write(f'<!--{_escape_cdata_c14n(text)}-->')
+        if not self._root_seen:
+            self._write('\n')
+
+    def pi(self, target, data):
+        if self._ignored_depth:
+            return
+        if self._root_done:
+            self._write('\n')
+        elif self._root_seen and self._data:
+            self._flush()
+        self._write(
+            f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
+        if not self._root_seen:
+            self._write('\n')
+
+
+def _escape_cdata_c14n(text):
+    # escape character data
+    try:
+        # it's worth avoiding do-nothing calls for strings that are
+        # shorter than 500 character, or so.  assume that's, by far,
+        # the most common case in most applications.
+        if '&' in text:
+            text = text.replace('&', '&amp;')
+        if '<' in text:
+            text = text.replace('<', '&lt;')
+        if '>' in text:
+            text = text.replace('>', '&gt;')
+        if '\r' in text:
+            text = text.replace('\r', '&#xD;')
+        return text
+    except (TypeError, AttributeError):
+        _raise_serialization_error(text)
+
+
+def _escape_attrib_c14n(text):
+    # escape attribute value
+    try:
+        if '&' in text:
+            text = text.replace('&', '&amp;')
+        if '<' in text:
+            text = text.replace('<', '&lt;')
+        if '"' in text:
+            text = text.replace('"', '&quot;')
+        if '\t' in text:
+            text = text.replace('\t', '&#x9;')
+        if '\n' in text:
+            text = text.replace('\n', '&#xA;')
+        if '\r' in text:
+            text = text.replace('\r', '&#xD;')
+        return text
+    except (TypeError, AttributeError):
+        _raise_serialization_error(text)
+
+
+# --------------------------------------------------------------------
+
 # Import the C accelerators
 try:
     # Element is going to be shadowed by the C implementation. We need to keep
diff --git a/Misc/NEWS.d/next/Library/2019-04-26-10-10-34.bpo-13611.XEF4bg.rst b/Misc/NEWS.d/next/Library/2019-04-26-10-10-34.bpo-13611.XEF4bg.rst
new file mode 100644 (file)
index 0000000..d01decb
--- /dev/null
@@ -0,0 +1,2 @@
+The xml.etree.ElementTree packages gained support for C14N 2.0 serialisation.
+Patch by Stefan Behnel.