]> granicus.if.org Git - python/commitdiff
Issue #2052: Add charset parameter to HtmlDiff.make_file().
authorBerker Peksag <berker.peksag@gmail.com>
Sat, 14 Mar 2015 23:18:47 +0000 (01:18 +0200)
committerBerker Peksag <berker.peksag@gmail.com>
Sat, 14 Mar 2015 23:18:47 +0000 (01:18 +0200)
Doc/library/difflib.rst
Doc/whatsnew/3.5.rst
Lib/difflib.py
Lib/test/test_difflib.py
Lib/test/test_difflib_expect.html
Misc/NEWS

index 329bde0af88c3cad58cb117396ba033cb8f8442b..442706556e0765e9385372aef48e5e9d7d6199d5 100644 (file)
@@ -104,7 +104,8 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
 
    The following methods are public:
 
-   .. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
+   .. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, \
+                         numlines=5, *, charset='utf-8')
 
       Compares *fromlines* and *tolines* (lists of strings) and returns a string which
       is a complete HTML file containing a table showing line by line differences with
@@ -123,6 +124,10 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
       the next difference highlight at the top of the browser without any leading
       context).
 
+      .. versionchanged:: 3.5
+         *charset* keyword-only argument was added.  The default charset of
+         HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
+
    .. method:: make_table(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
 
       Compares *fromlines* and *tolines* (lists of strings) and returns a string which
index 2f7984812ecf5f44128665f126df227fcb9e2a0e..21fafd0dffb45a835f9abcf52d6f19bc4ee5bd39 100644 (file)
@@ -225,6 +225,14 @@ contextlib
   don't provide any options to redirect it.
   (Contributed by Berker Peksag in :issue:`22389`.)
 
+difflib
+-------
+
+* The charset of the HTML document generated by :meth:`difflib.HtmlDiff.make_file`
+  can now be customized by using *charset* keyword-only parameter.  The default
+  charset of HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
+  (Contributed by Berker Peksag in :issue:`2052`.)
+
 distutils
 ---------
 
index ae3479d3d85e00cb839cd9b50e1dda90e995584f..758f1aad0a2e656af337ed858edde46c2b91bb9a 100644 (file)
@@ -1598,7 +1598,7 @@ _file_template = """
 
 <head>
     <meta http-equiv="Content-Type"
-          content="text/html; charset=ISO-8859-1" />
+          content="text/html; charset=%(charset)s" />
     <title></title>
     <style type="text/css">%(styles)s
     </style>
@@ -1685,8 +1685,8 @@ class HtmlDiff(object):
         self._linejunk = linejunk
         self._charjunk = charjunk
 
-    def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
-                  numlines=5):
+    def make_file(self, fromlines, tolines, fromdesc='', todesc='',
+                  context=False, numlines=5, *, charset='utf-8'):
         """Returns HTML file of side by side comparison with change highlights
 
         Arguments:
@@ -1701,13 +1701,16 @@ class HtmlDiff(object):
             When context is False, controls the number of lines to place
             the "next" link anchors before the next change (so click of
             "next" link jumps to just before the change).
+        charset -- charset of the HTML document
         """
 
-        return self._file_template % dict(
-            styles = self._styles,
-            legend = self._legend,
-            table = self.make_table(fromlines,tolines,fromdesc,todesc,
-                                    context=context,numlines=numlines))
+        return (self._file_template % dict(
+            styles=self._styles,
+            legend=self._legend,
+            table=self.make_table(fromlines, tolines, fromdesc, todesc,
+                                  context=context, numlines=numlines),
+            charset=charset
+        )).encode(charset, 'xmlcharrefreplace').decode(charset)
 
     def _tab_newline_replace(self,fromlines,tolines):
         """Returns from/to line lists with tabs expanded and newlines removed.
index 0ba8f0e05bcf471668cbe4994a6d94fb8114a0ed..a078e71fd847bd6f4ceb35672a01fbd42f826a13 100644 (file)
@@ -107,6 +107,20 @@ patch914575_to1 = """
    5. Flat is better than nested.
 """
 
+patch914575_nonascii_from1 = """
+   1. Beautiful is beTTer than ugly.
+   2. Explicit is better than ımplıcıt.
+   3. Simple is better than complex.
+   4. Complex is better than complicated.
+"""
+
+patch914575_nonascii_to1 = """
+   1. Beautiful is better than ügly.
+   3.   Sımple is better than complex.
+   4. Complicated is better than cömplex.
+   5. Flat is better than nested.
+"""
+
 patch914575_from2 = """
 \t\tLine 1: preceeded by from:[tt] to:[ssss]
   \t\tLine 2: preceeded by from:[sstt] to:[sssst]
@@ -223,6 +237,27 @@ class TestSFpatches(unittest.TestCase):
         new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
         difflib.SequenceMatcher(None, old, new).get_opcodes()
 
+    def test_make_file_default_charset(self):
+        html_diff = difflib.HtmlDiff()
+        output = html_diff.make_file(patch914575_from1.splitlines(),
+                                     patch914575_to1.splitlines())
+        self.assertIn('content="text/html; charset=utf-8"', output)
+
+    def test_make_file_iso88591_charset(self):
+        html_diff = difflib.HtmlDiff()
+        output = html_diff.make_file(patch914575_from1.splitlines(),
+                                     patch914575_to1.splitlines(),
+                                     charset='iso-8859-1')
+        self.assertIn('content="text/html; charset=iso-8859-1"', output)
+
+    def test_make_file_usascii_charset_with_nonascii_input(self):
+        html_diff = difflib.HtmlDiff()
+        output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
+                                     patch914575_nonascii_to1.splitlines(),
+                                     charset='us-ascii')
+        self.assertIn('content="text/html; charset=us-ascii"', output)
+        self.assertIn('&#305;mpl&#305;c&#305;t', output)
+
 
 class TestOutputFormat(unittest.TestCase):
     def test_tab_delimiter(self):
index 71b6d7a8620dafc0384c225ec1f075beab70bb47..ea7a24ef4beb283f9fb5342ce77bb7a0ab6bcf0c 100644 (file)
@@ -6,7 +6,7 @@
 
 <head>
     <meta http-equiv="Content-Type"
-          content="text/html; charset=ISO-8859-1" />
+          content="text/html; charset=utf-8" />
     <title></title>
     <style type="text/css">
         table.diff {font-family:Courier; border:medium;}
index 35265e1992c1f0873c4a81cc736aebaa4e971fc6..c73dd17ea397bd9cbfc4154bb3f443aab855e820 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -18,6 +18,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #2052: Add charset parameter to HtmlDiff.make_file().
+
 - Issue #23138: Fixed parsing cookies with absent keys or values in cookiejar.
   Patch by Demian Brecht.