Fix difflib `?` hint in diff output when dealing with tabs (#15201)
authorAnthony Sottile <asottile@umich.edu>
Wed, 21 Aug 2019 18:59:26 +0000 (11:59 -0700)
committerTim Peters <tim.peters@gmail.com>
Wed, 21 Aug 2019 18:59:25 +0000 (13:59 -0500)
Lib/difflib.py
Lib/test/test_difflib.py
Misc/NEWS.d/next/Library/2019-08-10-12-33-27.bpo-37810.d4zbvB.rst [new file with mode: 0644]

index 887c3c26cae4588cdfcfb922e3c2af90cb4a64b3..3de1b3d0fcdfee0358482ce0932c278d925bb111 100644 (file)
@@ -733,20 +733,15 @@ def get_close_matches(word, possibilities, n=3, cutoff=0.6):
     # Strip scores for the best n matches
     return [x for score, x in result]
 
-def _count_leading(line, ch):
-    """
-    Return number of `ch` characters at the start of `line`.
 
-    Example:
+def _keep_original_ws(s, tag_s):
+    """Replace whitespace with the original whitespace characters in `s`"""
+    return ''.join(
+        c if tag_c == " " and c.isspace() else tag_c
+        for c, tag_c in zip(s, tag_s)
+    )
 
-    >>> _count_leading('   abc', ' ')
-    3
-    """
 
-    i, n = 0, len(line)
-    while i < n and line[i] == ch:
-        i += 1
-    return i
 
 class Differ:
     r"""
@@ -1033,7 +1028,7 @@ class Differ:
 
     def _qformat(self, aline, bline, atags, btags):
         r"""
-        Format "?" output and deal with leading tabs.
+        Format "?" output and deal with tabs.
 
         Example:
 
@@ -1047,22 +1042,16 @@ class Differ:
         '+ \tabcdefGhijkl\n'
         '? \t ^ ^  ^\n'
         """
-
-        # Can hurt, but will probably help most of the time.
-        common = min(_count_leading(aline, "\t"),
-                     _count_leading(bline, "\t"))
-        common = min(common, _count_leading(atags[:common], " "))
-        common = min(common, _count_leading(btags[:common], " "))
-        atags = atags[common:].rstrip()
-        btags = btags[common:].rstrip()
+        atags = _keep_original_ws(aline, atags).rstrip()
+        btags = _keep_original_ws(bline, btags).rstrip()
 
         yield "- " + aline
         if atags:
-            yield "? %s%s\n" % ("\t" * common, atags)
+            yield f"? {atags}\n"
 
         yield "+ " + bline
         if btags:
-            yield "? %s%s\n" % ("\t" * common, btags)
+            yield f"? {btags}\n"
 
 # With respect to junk, an earlier version of ndiff simply refused to
 # *start* a match with a junk element.  The result was cases like this:
index 745ccbd6659ed5a3df435daa5b3b2857a85fdbf4..5e2ca1a23b928ba41e467896da341c83993a9906 100644 (file)
@@ -89,10 +89,16 @@ class TestSFbugs(unittest.TestCase):
         # Check fix for bug #1488943
         diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
         self.assertEqual("- \tI am a buggy", diff[0])
-        self.assertEqual("?            --\n", diff[1])
+        self.assertEqual("? \t          --\n", diff[1])
         self.assertEqual("+ \t\tI am a bug", diff[2])
         self.assertEqual("? +\n", diff[3])
 
+    def test_hint_indented_properly_with_tabs(self):
+        diff = list(difflib.Differ().compare(["\t \t \t^"], ["\t \t \t^\n"]))
+        self.assertEqual("- \t \t \t^", diff[0])
+        self.assertEqual("+ \t \t \t^\n", diff[1])
+        self.assertEqual("? \t \t \t +\n", diff[2])
+
     def test_mdiff_catch_stop_iteration(self):
         # Issue #33224
         self.assertEqual(
diff --git a/Misc/NEWS.d/next/Library/2019-08-10-12-33-27.bpo-37810.d4zbvB.rst b/Misc/NEWS.d/next/Library/2019-08-10-12-33-27.bpo-37810.d4zbvB.rst
new file mode 100644 (file)
index 0000000..3e0b317
--- /dev/null
@@ -0,0 +1,2 @@
+Fix :mod:`difflib` ``?`` hint in diff output when dealing with tabs.  Patch
+by Anthony Sottile.