Issue #20750, Enable roundtrip tests for new 5-tuple untokenize. The

author Terry Jan Reedy <tjreedy@udel.edu>

Sun, 23 Feb 2014 23:00:31 +0000 (18:00 -0500)

committer Terry Jan Reedy <tjreedy@udel.edu>

Sun, 23 Feb 2014 23:00:31 +0000 (18:00 -0500)
author Terry Jan Reedy <tjreedy@udel.edu>
Sun, 23 Feb 2014 23:00:31 +0000 (18:00 -0500)
committer Terry Jan Reedy <tjreedy@udel.edu>
Sun, 23 Feb 2014 23:00:31 +0000 (18:00 -0500)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py

index 7008d0e46fd9af6927944ca2b597effda5e928ab..7a472b21f37bff5c453d0efcbf634f182046b7cd 100644 (file)
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -578,9 +578,15 @@ pass the '-ucpu' option to process the full directory.
      >>> tempdir = os.path.dirname(f) or os.curdir
      >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
  
-tokenize is broken on test_pep3131.py because regular expressions are broken on
-the obscure unicode identifiers in it. *sigh*
+Tokenize is broken on test_pep3131.py because regular expressions are
+broken on the obscure unicode identifiers in it. *sigh*
+With roundtrip extended to test the 5-tuple mode of  untokenize,
+7 more testfiles fail.  Remove them also until the failure is diagnosed.
+
      >>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))
+    >>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
+    ...     testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
+    ...
      >>> if not support.is_resource_enabled("cpu"):
      ...     testfiles = random.sample(testfiles, 10)
      ...
@@ -659,21 +665,39 @@ def dump_tokens(s):
  def roundtrip(f):
      """
      Test roundtrip for `untokenize`. `f` is an open file or a string.
-    The source code in f is tokenized, converted back to source code via
-    tokenize.untokenize(), and tokenized again from the latter. The test
-    fails if the second tokenization doesn't match the first.
+    The source code in f is tokenized to both 5- and 2-tuples.
+    Both sequences are converted back to source code via
+    tokenize.untokenize(), and the latter tokenized again to 2-tuples.
+    The test fails if the 3 pair tokenizations do not match.
+
+    When untokenize bugs are fixed, untokenize with 5-tuples should
+    reproduce code that does not contain a backslash continuation
+    following spaces.  A proper test should test this.
+
+    This function would be more useful for correcting bugs if it reported
+    the first point of failure, like assertEqual, rather than just
+    returning False -- or if it were only used in unittests and not
+    doctest and actually used assertEqual.
      """
+    # Get source code and original tokenizations
      if isinstance(f, str):
-        f = BytesIO(f.encode('utf-8'))
-    try:
-        token_list = list(tokenize(f.readline))
-    finally:
+        code = f.encode('utf-8')
+    else:
+        code = f.read()
          f.close()
-    tokens1 = [tok[:2] for tok in token_list]
-    new_bytes = untokenize(tokens1)
-    readline = (line for line in new_bytes.splitlines(keepends=True)).__next__
-    tokens2 = [tok[:2] for tok in tokenize(readline)]
-    return tokens1 == tokens2
+    readline = iter(code.splitlines(keepends=True)).__next__
+    tokens5 = list(tokenize(readline))
+    tokens2 = [tok[:2] for tok in tokens5]
+    # Reproduce tokens2 from pairs
+    bytes_from2 = untokenize(tokens2)
+    readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
+    tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
+    # Reproduce tokens2 from 5-tuples
+    bytes_from5 = untokenize(tokens5)
+    readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
+    tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
+    # Compare 3 versions
+    return tokens2 == tokens2_from2 == tokens2_from5
  
  # This is an example from the docs, set up as a doctest.
  def decistmt(s):
author	Terry Jan Reedy <tjreedy@udel.edu>
	Sun, 23 Feb 2014 23:00:31 +0000 (18:00 -0500)
committer	Terry Jan Reedy <tjreedy@udel.edu>
	Sun, 23 Feb 2014 23:00:31 +0000 (18:00 -0500)