Replaced the ELLIPSIS implementation with a worst-case linear-time one.

author Tim Peters <tim.peters@gmail.com>

Thu, 19 Aug 2004 08:10:08 +0000 (08:10 +0000)

committer Tim Peters <tim.peters@gmail.com>

Thu, 19 Aug 2004 08:10:08 +0000 (08:10 +0000)
author Tim Peters <tim.peters@gmail.com>
Thu, 19 Aug 2004 08:10:08 +0000 (08:10 +0000)
committer Tim Peters <tim.peters@gmail.com>
Thu, 19 Aug 2004 08:10:08 +0000 (08:10 +0000)
diff --git a/Lib/doctest.py b/Lib/doctest.py

index f7b29d3a50a33e34f49a2395cf04a2d188093b32..6c5679ddd0ee0f52239a1d85f950d19bca3e188c 100644 (file)
--- a/Lib/doctest.py
+++ b/Lib/doctest.py
@@ -362,6 +362,50 @@ class _SpoofOut(StringIO):
          if hasattr(self, "softspace"):
              del self.softspace
  
+# Worst-case linear-time ellipsis matching.
+def ellipsis_match(want, got):
+    if ELLIPSIS_MARKER not in want:
+        return want == got
+    # Remove \n from ...\n, else the newline will be required,
+    # and (for example) ... on a line by itself can't match
+    # nothing gracefully.
+    want = want.replace(ELLIPSIS_MARKER + '\n', ELLIPSIS_MARKER)
+    # Find "the real" strings.
+    ws = want.split(ELLIPSIS_MARKER)
+    assert len(ws) >= 2
+    # Match.  In general, we only need to find the leftmost non-overlapping
+    # match for each piece.  "Real strings" at the start or end of `want`
+    # are special cases.
+    w = ws[0]
+    if w:
+        # An ellipsis didn't start `want`.  We need to match exactly
+        # at the start.
+        if not got.startswith(w):
+            return False
+        pos = len(w)
+        del ws[0]
+    else:
+        pos = 0
+
+    for w in ws:
+        # w may be '' at times, if there are consecutive ellipses, or
+        # due to an ellipsis at the start or end of `want`.  That's OK.
+        # Search for an empty string succeeds, and doesn't change pos.
+        pos = got.find(w, pos)
+        if pos < 0:
+            return False
+        pos += len(w)
+
+    # If `want` ended with an ellipsis, the tail matches anything.
+    if ws[-1] == '':
+        return True
+    # Else `want` ended with a real string.  If the last real match
+    # exhausted `got`, we win.
+    if pos == len(got):
+        return True
+    # Else maybe we matched the last real string too early.
+    return got.endswith(ws[-1])
+
  ######################################################################
  ## 2. Example & DocTest
  ######################################################################
@@ -1475,22 +1519,9 @@ class OutputChecker:
                  return True
  
          # The ELLIPSIS flag says to let the sequence "..." in `want`
-        # match any substring in `got`.  We implement this by
-        # transforming `want` into a regular expression.
+        # match any substring in `got`.
          if optionflags & ELLIPSIS:
-            # Remove \n from ...\n, else the newline will be required,
-            # and (for example) ... on a line by itself can't match
-            # nothing gracefully.
-            want_re = want.replace(ELLIPSIS_MARKER + '\n', ELLIPSIS_MARKER)
-            # Escape any special regexp characters
-            want_re = re.escape(want_re)
-            # Replace escaped ellipsis markers ('\.\.\.') with .*
-            want_re = want_re.replace(re.escape(ELLIPSIS_MARKER), '.*')
-            # Require that it matches the entire string; and set the
-            # re.DOTALL flag (with '(?s)').
-            want_re = '(?s)^%s$' % want_re
-            # Check if the `want_re` regexp matches got.
-            if re.match(want_re, got):
+            if ellipsis_match(want, got):
                  return True
  
          # We didn't find any match; return false.
diff --git a/Lib/test/test_doctest.py b/Lib/test/test_doctest.py

index d49e6cff0cb67046e40e5607cdfe9a4bb38997d6..e96fd2ae640e47002996f5987752f6c1775e38a4 100644 (file)
--- a/Lib/test/test_doctest.py
+++ b/Lib/test/test_doctest.py
@@ -780,13 +780,8 @@ output to match any substring in the actual output:
      >>> doctest.DocTestRunner(verbose=False, optionflags=flags).run(test)
      (0, 1)
  
-... should also match nothing gracefully:
-XXX This can be provoked into requiring exponential time by adding more
-XXX ellipses; the implementation should change.  It's much easier to
-XXX provoke exponential time with expected output that doesn't match,
-XXX BTW (then multiple regexp .* thingies each try all possiblities,
-XXX multiplicatively, without hope of success).  That's the real danger,
-XXX that a failing test will appear to be hung.
+... should also match nothing gracefully (note that a regular-expression
+implementation of ELLIPSIS would take a loooong time to match this one!):
  
      >>> for i in range(100):
      ...     print i**2 #doctest: +ELLIPSIS
@@ -794,15 +789,32 @@ XXX that a failing test will appear to be hung.
      ...
      1
      ...
+    ......
+    ...
      36
      ...
      ...
+    ...
      49
      64
-    ......
+    .........
      9801
      ...
  
+... can be surprising; e.g., this test passes:
+
+    >>> for i in range(21): #doctest: +ELLIPSIS
+    ...     print i
+    0
+    1
+    2
+    ...
+    1
+    ...
+    2
+    ...
+    0
+
  The UNIFIED_DIFF flag causes failures that involve multi-line expected
  and actual outputs to be displayed using a unified diff:
author	Tim Peters <tim.peters@gmail.com>
	Thu, 19 Aug 2004 08:10:08 +0000 (08:10 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Thu, 19 Aug 2004 08:10:08 +0000 (08:10 +0000)
Lib/doctest.py		patch \| blob \| history
Lib/test/test_doctest.py		patch \| blob \| history