>>> tempdir = os.path.dirname(f) or os.curdir
>>> testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
-tokenize is broken on test_pep3131.py because regular expressions are broken on
-the obscure unicode identifiers in it. *sigh*
+Tokenize is broken on test_pep3131.py because regular expressions are
+broken on the obscure unicode identifiers in it. *sigh*
+With roundtrip extended to test the 5-tuple mode of untokenize,
+7 more testfiles fail. Remove them also until the failure is diagnosed.
+
>>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py"))
+ >>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
+ ... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
+ ...
>>> if not support.is_resource_enabled("cpu"):
... testfiles = random.sample(testfiles, 10)
...
def roundtrip(f):
"""
Test roundtrip for `untokenize`. `f` is an open file or a string.
- The source code in f is tokenized, converted back to source code via
- tokenize.untokenize(), and tokenized again from the latter. The test
- fails if the second tokenization doesn't match the first.
+ The source code in f is tokenized to both 5- and 2-tuples.
+ Both sequences are converted back to source code via
+ tokenize.untokenize(), and the latter tokenized again to 2-tuples.
+ The test fails if the 3 pair tokenizations do not match.
+
+ When untokenize bugs are fixed, untokenize with 5-tuples should
+ reproduce code that does not contain a backslash continuation
+ following spaces. A proper test should test this.
+
+ This function would be more useful for correcting bugs if it reported
+ the first point of failure, like assertEqual, rather than just
+ returning False -- or if it were only used in unittests and not
+ doctest and actually used assertEqual.
"""
+ # Get source code and original tokenizations
if isinstance(f, str):
- f = BytesIO(f.encode('utf-8'))
- try:
- token_list = list(tokenize(f.readline))
- finally:
+ code = f.encode('utf-8')
+ else:
+ code = f.read()
f.close()
- tokens1 = [tok[:2] for tok in token_list]
- new_bytes = untokenize(tokens1)
- readline = (line for line in new_bytes.splitlines(keepends=True)).__next__
- tokens2 = [tok[:2] for tok in tokenize(readline)]
- return tokens1 == tokens2
+ readline = iter(code.splitlines(keepends=True)).__next__
+ tokens5 = list(tokenize(readline))
+ tokens2 = [tok[:2] for tok in tokens5]
+ # Reproduce tokens2 from pairs
+ bytes_from2 = untokenize(tokens2)
+ readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
+ tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
+ # Reproduce tokens2 from 5-tuples
+ bytes_from5 = untokenize(tokens5)
+ readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
+ tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
+ # Compare 3 versions
+ return tokens2 == tokens2_from2 == tokens2_from5
# This is an example from the docs, set up as a doctest.
def decistmt(s):