From: Serhiy Storchaka Date: Tue, 6 Oct 2015 15:23:12 +0000 (+0300) Subject: Issue #25317: Converted doctests in test_tokenize to unittests. X-Git-Tag: v3.5.1rc1~207^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6f5175de154c9a13db2d8e157239a6cbc14e5001;p=python Issue #25317: Converted doctests in test_tokenize to unittests. Made test_tokenize discoverable. --- 6f5175de154c9a13db2d8e157239a6cbc14e5001 diff --cc Lib/test/test_tokenize.py index b7ca08949a,40b0c90dde..3b17ca6329 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@@ -463,11 -420,11 +420,11 @@@ def k(x) OP '[' (1, 34) (1, 35) NUMBER '5' (1, 35) (1, 36) OP ']' (1, 36) (1, 37) + """) - Multiplicative - - >>> dump_tokens("x = 1//1*1/5*12%0x12@42") - ENCODING 'utf-8' (0, 0) (0, 0) + def test_multiplicative(self): + # Multiplicative - self.check_tokenize("x = 1//1*1/5*12%0x12", """\ ++ self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\ NAME 'x' (1, 0) (1, 1) OP '=' (1, 2) (1, 3) NUMBER '1' (1, 4) (1, 5) @@@ -481,13 -438,11 +438,13 @@@ NUMBER '12' (1, 13) (1, 15) OP '%' (1, 15) (1, 16) NUMBER '0x12' (1, 16) (1, 20) + OP '@' (1, 20) (1, 21) + NUMBER '42' (1, 21) (1, 23) + """) - Unary - - >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1") - ENCODING 'utf-8' (0, 0) (0, 0) + def test_unary(self): + # Unary + self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\ OP '~' (1, 0) (1, 1) NUMBER '1' (1, 1) (1, 2) OP '^' (1, 3) (1, 4) @@@ -643,359 -559,10 +561,284 @@@ NAME 'grün' (2, 0) (2, 4) OP '=' (2, 5) (2, 6) STRING "U'green'" (2, 7) (2, 15) + """) - Async/await extension: - - >>> dump_tokens("async = 1") - ENCODING 'utf-8' (0, 0) (0, 0) ++ def test_async(self): ++ # Async/await extension: ++ self.check_tokenize("async = 1", """\ + NAME 'async' (1, 0) (1, 5) + OP '=' (1, 6) (1, 7) + NUMBER '1' (1, 8) (1, 9) ++ """) + - >>> dump_tokens("a = (async = 1)") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("a = (async = 1)", """\ + NAME 'a' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + OP '(' (1, 4) (1, 5) + NAME 'async' (1, 5) (1, 10) + OP '=' (1, 11) (1, 12) + NUMBER '1' (1, 13) (1, 14) + OP ')' (1, 14) (1, 15) ++ """) + - >>> dump_tokens("async()") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async()", """\ + NAME 'async' (1, 0) (1, 5) + OP '(' (1, 5) (1, 6) + OP ')' (1, 6) (1, 7) ++ """) + - >>> dump_tokens("class async(Bar):pass") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("class async(Bar):pass", """\ + NAME 'class' (1, 0) (1, 5) + NAME 'async' (1, 6) (1, 11) + OP '(' (1, 11) (1, 12) + NAME 'Bar' (1, 12) (1, 15) + OP ')' (1, 15) (1, 16) + OP ':' (1, 16) (1, 17) + NAME 'pass' (1, 17) (1, 21) ++ """) + - >>> dump_tokens("class async:pass") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("class async:pass", """\ + NAME 'class' (1, 0) (1, 5) + NAME 'async' (1, 6) (1, 11) + OP ':' (1, 11) (1, 12) + NAME 'pass' (1, 12) (1, 16) ++ """) + - >>> dump_tokens("await = 1") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("await = 1", """\ + NAME 'await' (1, 0) (1, 5) + OP '=' (1, 6) (1, 7) + NUMBER '1' (1, 8) (1, 9) ++ """) + - >>> dump_tokens("foo.async") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("foo.async", """\ + NAME 'foo' (1, 0) (1, 3) + OP '.' (1, 3) (1, 4) + NAME 'async' (1, 4) (1, 9) ++ """) + - >>> dump_tokens("async for a in b: pass") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async for a in b: pass", """\ + NAME 'async' (1, 0) (1, 5) + NAME 'for' (1, 6) (1, 9) + NAME 'a' (1, 10) (1, 11) + NAME 'in' (1, 12) (1, 14) + NAME 'b' (1, 15) (1, 16) + OP ':' (1, 16) (1, 17) + NAME 'pass' (1, 18) (1, 22) ++ """) + - >>> dump_tokens("async with a as b: pass") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async with a as b: pass", """\ + NAME 'async' (1, 0) (1, 5) + NAME 'with' (1, 6) (1, 10) + NAME 'a' (1, 11) (1, 12) + NAME 'as' (1, 13) (1, 15) + NAME 'b' (1, 16) (1, 17) + OP ':' (1, 17) (1, 18) + NAME 'pass' (1, 19) (1, 23) ++ """) + - >>> dump_tokens("async.foo") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async.foo", """\ + NAME 'async' (1, 0) (1, 5) + OP '.' (1, 5) (1, 6) + NAME 'foo' (1, 6) (1, 9) ++ """) + - >>> dump_tokens("async") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async", """\ + NAME 'async' (1, 0) (1, 5) ++ """) + - >>> dump_tokens("async\\n#comment\\nawait") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async\n#comment\nawait", """\ + NAME 'async' (1, 0) (1, 5) + NEWLINE '\\n' (1, 5) (1, 6) + COMMENT '#comment' (2, 0) (2, 8) + NL '\\n' (2, 8) (2, 9) + NAME 'await' (3, 0) (3, 5) ++ """) + - >>> dump_tokens("async\\n...\\nawait") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async\n...\nawait", """\ + NAME 'async' (1, 0) (1, 5) + NEWLINE '\\n' (1, 5) (1, 6) + OP '...' (2, 0) (2, 3) + NEWLINE '\\n' (2, 3) (2, 4) + NAME 'await' (3, 0) (3, 5) ++ """) + - >>> dump_tokens("async\\nawait") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async\nawait", """\ + NAME 'async' (1, 0) (1, 5) + NEWLINE '\\n' (1, 5) (1, 6) + NAME 'await' (2, 0) (2, 5) ++ """) + - >>> dump_tokens("foo.async + 1") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("foo.async + 1", """\ + NAME 'foo' (1, 0) (1, 3) + OP '.' (1, 3) (1, 4) + NAME 'async' (1, 4) (1, 9) + OP '+' (1, 10) (1, 11) + NUMBER '1' (1, 12) (1, 13) ++ """) + - >>> dump_tokens("async def foo(): pass") - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize("async def foo(): pass", """\ + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + OP '(' (1, 13) (1, 14) + OP ')' (1, 14) (1, 15) + OP ':' (1, 15) (1, 16) + NAME 'pass' (1, 17) (1, 21) - - >>> dump_tokens('''async def foo(): - ... def foo(await): - ... await = 1 - ... if 1: - ... await - ... async += 1 - ... ''') - ENCODING 'utf-8' (0, 0) (0, 0) ++ """) ++ ++ self.check_tokenize('''\ ++async def foo(): ++ def foo(await): ++ await = 1 ++ if 1: ++ await ++async += 1 ++''', """\ + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + OP '(' (1, 13) (1, 14) + OP ')' (1, 14) (1, 15) + OP ':' (1, 15) (1, 16) + NEWLINE '\\n' (1, 16) (1, 17) + INDENT ' ' (2, 0) (2, 2) + NAME 'def' (2, 2) (2, 5) + NAME 'foo' (2, 6) (2, 9) + OP '(' (2, 9) (2, 10) + AWAIT 'await' (2, 10) (2, 15) + OP ')' (2, 15) (2, 16) + OP ':' (2, 16) (2, 17) + NEWLINE '\\n' (2, 17) (2, 18) + INDENT ' ' (3, 0) (3, 4) + AWAIT 'await' (3, 4) (3, 9) + OP '=' (3, 10) (3, 11) + NUMBER '1' (3, 12) (3, 13) + NEWLINE '\\n' (3, 13) (3, 14) + DEDENT '' (4, 2) (4, 2) + NAME 'if' (4, 2) (4, 4) + NUMBER '1' (4, 5) (4, 6) + OP ':' (4, 6) (4, 7) + NEWLINE '\\n' (4, 7) (4, 8) + INDENT ' ' (5, 0) (5, 4) + AWAIT 'await' (5, 4) (5, 9) + NEWLINE '\\n' (5, 9) (5, 10) + DEDENT '' (6, 0) (6, 0) + DEDENT '' (6, 0) (6, 0) + NAME 'async' (6, 0) (6, 5) + OP '+=' (6, 6) (6, 8) + NUMBER '1' (6, 9) (6, 10) + NEWLINE '\\n' (6, 10) (6, 11) ++ """) + - >>> dump_tokens('''async def foo(): - ... async for i in 1: pass''') - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize('''\ ++async def foo(): ++ async for i in 1: pass''', """\ + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + OP '(' (1, 13) (1, 14) + OP ')' (1, 14) (1, 15) + OP ':' (1, 15) (1, 16) + NEWLINE '\\n' (1, 16) (1, 17) + INDENT ' ' (2, 0) (2, 2) + ASYNC 'async' (2, 2) (2, 7) + NAME 'for' (2, 8) (2, 11) + NAME 'i' (2, 12) (2, 13) + NAME 'in' (2, 14) (2, 16) + NUMBER '1' (2, 17) (2, 18) + OP ':' (2, 18) (2, 19) + NAME 'pass' (2, 20) (2, 24) + DEDENT '' (3, 0) (3, 0) ++ """) + - >>> dump_tokens('''async def foo(async): await''') - ENCODING 'utf-8' (0, 0) (0, 0) ++ self.check_tokenize('''async def foo(async): await''', """\ + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'foo' (1, 10) (1, 13) + OP '(' (1, 13) (1, 14) + ASYNC 'async' (1, 14) (1, 19) + OP ')' (1, 19) (1, 20) + OP ':' (1, 20) (1, 21) + AWAIT 'await' (1, 22) (1, 27) ++ """) ++ ++ self.check_tokenize('''\ ++def f(): + - >>> dump_tokens('''def f(): - ... - ... def baz(): pass - ... async def bar(): pass - ... - ... await = 2''') - ENCODING 'utf-8' (0, 0) (0, 0) ++ def baz(): pass ++ async def bar(): pass ++ ++ await = 2''', """\ + NAME 'def' (1, 0) (1, 3) + NAME 'f' (1, 4) (1, 5) + OP '(' (1, 5) (1, 6) + OP ')' (1, 6) (1, 7) + OP ':' (1, 7) (1, 8) + NEWLINE '\\n' (1, 8) (1, 9) + NL '\\n' (2, 0) (2, 1) + INDENT ' ' (3, 0) (3, 2) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + OP '(' (3, 9) (3, 10) + OP ')' (3, 10) (3, 11) + OP ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '\\n' (3, 17) (3, 18) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + OP '(' (4, 15) (4, 16) + OP ')' (4, 16) (4, 17) + OP ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '\\n' (4, 23) (4, 24) + NL '\\n' (5, 0) (5, 1) + NAME 'await' (6, 2) (6, 7) + OP '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (7, 0) (7, 0) ++ """) ++ ++ self.check_tokenize('''\ ++async def f(): ++ ++ def baz(): pass ++ async def bar(): pass + - >>> dump_tokens('''async def f(): - ... - ... def baz(): pass - ... async def bar(): pass - ... - ... await = 2''') - ENCODING 'utf-8' (0, 0) (0, 0) ++ await = 2''', """\ + ASYNC 'async' (1, 0) (1, 5) + NAME 'def' (1, 6) (1, 9) + NAME 'f' (1, 10) (1, 11) + OP '(' (1, 11) (1, 12) + OP ')' (1, 12) (1, 13) + OP ':' (1, 13) (1, 14) + NEWLINE '\\n' (1, 14) (1, 15) + NL '\\n' (2, 0) (2, 1) + INDENT ' ' (3, 0) (3, 2) + NAME 'def' (3, 2) (3, 5) + NAME 'baz' (3, 6) (3, 9) + OP '(' (3, 9) (3, 10) + OP ')' (3, 10) (3, 11) + OP ':' (3, 11) (3, 12) + NAME 'pass' (3, 13) (3, 17) + NEWLINE '\\n' (3, 17) (3, 18) + ASYNC 'async' (4, 2) (4, 7) + NAME 'def' (4, 8) (4, 11) + NAME 'bar' (4, 12) (4, 15) + OP '(' (4, 15) (4, 16) + OP ')' (4, 16) (4, 17) + OP ':' (4, 17) (4, 18) + NAME 'pass' (4, 19) (4, 23) + NEWLINE '\\n' (4, 23) (4, 24) + NL '\\n' (5, 0) (5, 1) + AWAIT 'await' (6, 2) (6, 7) + OP '=' (6, 8) (6, 9) + NUMBER '2' (6, 10) (6, 11) + DEDENT '' (7, 0) (7, 0) - """ - - from test import support - from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, - STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, - open as tokenize_open, Untokenizer) - from io import BytesIO - from unittest import TestCase, mock - import os - import token ++ """) + - def dump_tokens(s): - """Print out the tokens in s in a table format. - The ENDMARKER is omitted. - """ - f = BytesIO(s.encode('utf-8')) - for type, token, start, end, line in tokenize(f.readline): - if type == ENDMARKER: - break - type = tok_name[type] - print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals()) - - def roundtrip(f): - """ - Test roundtrip for `untokenize`. `f` is an open file or a string. - The source code in f is tokenized to both 5- and 2-tuples. - Both sequences are converted back to source code via - tokenize.untokenize(), and the latter tokenized again to 2-tuples. - The test fails if the 3 pair tokenizations do not match. - - When untokenize bugs are fixed, untokenize with 5-tuples should - reproduce code that does not contain a backslash continuation - following spaces. A proper test should test this. - - This function would be more useful for correcting bugs if it reported - the first point of failure, like assertEqual, rather than just - returning False -- or if it were only used in unittests and not - doctest and actually used assertEqual. - """ - # Get source code and original tokenizations - if isinstance(f, str): - code = f.encode('utf-8') - else: - code = f.read() - f.close() - readline = iter(code.splitlines(keepends=True)).__next__ - tokens5 = list(tokenize(readline)) - tokens2 = [tok[:2] for tok in tokens5] - # Reproduce tokens2 from pairs - bytes_from2 = untokenize(tokens2) - readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ - tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] - # Reproduce tokens2 from 5-tuples - bytes_from5 = untokenize(tokens5) - readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ - tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] - # Compare 3 versions - return tokens2 == tokens2_from2 == tokens2_from5 - - # This is an example from the docs, set up as a doctest. def decistmt(s): - """Substitute Decimals for floats in a string of statements. - - >>> from decimal import Decimal - >>> s = 'print(+21.3e-5*-.1234/81.7)' - >>> decistmt(s) - "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))" - - The format of the exponent is inherited from the platform C library. - Known cases are "e-007" (Windows) and "e-07" (not Windows). Since - we're only showing 11 digits, and the 12th isn't close to 5, the - rest of the output should be platform-independent. - - >>> exec(s) #doctest: +ELLIPSIS - -3.2171603427...e-0...7 - - Output from calculations with Decimal should be identical across all - platforms. - - >>> exec(decistmt(s)) - -3.217160342717258261933904529E-7 - """ result = [] g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string for toknum, tokval, _, _, _ in g: