OP '[' (1, 34) (1, 35)
NUMBER '5' (1, 35) (1, 36)
OP ']' (1, 36) (1, 37)
+ """)
- Multiplicative
-
- >>> dump_tokens("x = 1//1*1/5*12%0x12@42")
- ENCODING 'utf-8' (0, 0) (0, 0)
+ def test_multiplicative(self):
+ # Multiplicative
- self.check_tokenize("x = 1//1*1/5*12%0x12", """\
++ self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
NAME 'x' (1, 0) (1, 1)
OP '=' (1, 2) (1, 3)
NUMBER '1' (1, 4) (1, 5)
NUMBER '12' (1, 13) (1, 15)
OP '%' (1, 15) (1, 16)
NUMBER '0x12' (1, 16) (1, 20)
+ OP '@' (1, 20) (1, 21)
+ NUMBER '42' (1, 21) (1, 23)
+ """)
- Unary
-
- >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1")
- ENCODING 'utf-8' (0, 0) (0, 0)
+ def test_unary(self):
+ # Unary
+ self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
OP '~' (1, 0) (1, 1)
NUMBER '1' (1, 1) (1, 2)
OP '^' (1, 3) (1, 4)
NAME 'grĂ¼n' (2, 0) (2, 4)
OP '=' (2, 5) (2, 6)
STRING "U'green'" (2, 7) (2, 15)
+ """)
- Async/await extension:
-
- >>> dump_tokens("async = 1")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ def test_async(self):
++ # Async/await extension:
++ self.check_tokenize("async = 1", """\
+ NAME 'async' (1, 0) (1, 5)
+ OP '=' (1, 6) (1, 7)
+ NUMBER '1' (1, 8) (1, 9)
++ """)
+
- >>> dump_tokens("a = (async = 1)")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("a = (async = 1)", """\
+ NAME 'a' (1, 0) (1, 1)
+ OP '=' (1, 2) (1, 3)
+ OP '(' (1, 4) (1, 5)
+ NAME 'async' (1, 5) (1, 10)
+ OP '=' (1, 11) (1, 12)
+ NUMBER '1' (1, 13) (1, 14)
+ OP ')' (1, 14) (1, 15)
++ """)
+
- >>> dump_tokens("async()")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async()", """\
+ NAME 'async' (1, 0) (1, 5)
+ OP '(' (1, 5) (1, 6)
+ OP ')' (1, 6) (1, 7)
++ """)
+
- >>> dump_tokens("class async(Bar):pass")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("class async(Bar):pass", """\
+ NAME 'class' (1, 0) (1, 5)
+ NAME 'async' (1, 6) (1, 11)
+ OP '(' (1, 11) (1, 12)
+ NAME 'Bar' (1, 12) (1, 15)
+ OP ')' (1, 15) (1, 16)
+ OP ':' (1, 16) (1, 17)
+ NAME 'pass' (1, 17) (1, 21)
++ """)
+
- >>> dump_tokens("class async:pass")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("class async:pass", """\
+ NAME 'class' (1, 0) (1, 5)
+ NAME 'async' (1, 6) (1, 11)
+ OP ':' (1, 11) (1, 12)
+ NAME 'pass' (1, 12) (1, 16)
++ """)
+
- >>> dump_tokens("await = 1")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("await = 1", """\
+ NAME 'await' (1, 0) (1, 5)
+ OP '=' (1, 6) (1, 7)
+ NUMBER '1' (1, 8) (1, 9)
++ """)
+
- >>> dump_tokens("foo.async")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("foo.async", """\
+ NAME 'foo' (1, 0) (1, 3)
+ OP '.' (1, 3) (1, 4)
+ NAME 'async' (1, 4) (1, 9)
++ """)
+
- >>> dump_tokens("async for a in b: pass")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async for a in b: pass", """\
+ NAME 'async' (1, 0) (1, 5)
+ NAME 'for' (1, 6) (1, 9)
+ NAME 'a' (1, 10) (1, 11)
+ NAME 'in' (1, 12) (1, 14)
+ NAME 'b' (1, 15) (1, 16)
+ OP ':' (1, 16) (1, 17)
+ NAME 'pass' (1, 18) (1, 22)
++ """)
+
- >>> dump_tokens("async with a as b: pass")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async with a as b: pass", """\
+ NAME 'async' (1, 0) (1, 5)
+ NAME 'with' (1, 6) (1, 10)
+ NAME 'a' (1, 11) (1, 12)
+ NAME 'as' (1, 13) (1, 15)
+ NAME 'b' (1, 16) (1, 17)
+ OP ':' (1, 17) (1, 18)
+ NAME 'pass' (1, 19) (1, 23)
++ """)
+
- >>> dump_tokens("async.foo")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async.foo", """\
+ NAME 'async' (1, 0) (1, 5)
+ OP '.' (1, 5) (1, 6)
+ NAME 'foo' (1, 6) (1, 9)
++ """)
+
- >>> dump_tokens("async")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async", """\
+ NAME 'async' (1, 0) (1, 5)
++ """)
+
- >>> dump_tokens("async\\n#comment\\nawait")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async\n#comment\nawait", """\
+ NAME 'async' (1, 0) (1, 5)
+ NEWLINE '\\n' (1, 5) (1, 6)
+ COMMENT '#comment' (2, 0) (2, 8)
+ NL '\\n' (2, 8) (2, 9)
+ NAME 'await' (3, 0) (3, 5)
++ """)
+
- >>> dump_tokens("async\\n...\\nawait")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async\n...\nawait", """\
+ NAME 'async' (1, 0) (1, 5)
+ NEWLINE '\\n' (1, 5) (1, 6)
+ OP '...' (2, 0) (2, 3)
+ NEWLINE '\\n' (2, 3) (2, 4)
+ NAME 'await' (3, 0) (3, 5)
++ """)
+
- >>> dump_tokens("async\\nawait")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async\nawait", """\
+ NAME 'async' (1, 0) (1, 5)
+ NEWLINE '\\n' (1, 5) (1, 6)
+ NAME 'await' (2, 0) (2, 5)
++ """)
+
- >>> dump_tokens("foo.async + 1")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("foo.async + 1", """\
+ NAME 'foo' (1, 0) (1, 3)
+ OP '.' (1, 3) (1, 4)
+ NAME 'async' (1, 4) (1, 9)
+ OP '+' (1, 10) (1, 11)
+ NUMBER '1' (1, 12) (1, 13)
++ """)
+
- >>> dump_tokens("async def foo(): pass")
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize("async def foo(): pass", """\
+ ASYNC 'async' (1, 0) (1, 5)
+ NAME 'def' (1, 6) (1, 9)
+ NAME 'foo' (1, 10) (1, 13)
+ OP '(' (1, 13) (1, 14)
+ OP ')' (1, 14) (1, 15)
+ OP ':' (1, 15) (1, 16)
+ NAME 'pass' (1, 17) (1, 21)
-
- >>> dump_tokens('''async def foo():
- ... def foo(await):
- ... await = 1
- ... if 1:
- ... await
- ... async += 1
- ... ''')
- ENCODING 'utf-8' (0, 0) (0, 0)
++ """)
++
++ self.check_tokenize('''\
++async def foo():
++ def foo(await):
++ await = 1
++ if 1:
++ await
++async += 1
++''', """\
+ ASYNC 'async' (1, 0) (1, 5)
+ NAME 'def' (1, 6) (1, 9)
+ NAME 'foo' (1, 10) (1, 13)
+ OP '(' (1, 13) (1, 14)
+ OP ')' (1, 14) (1, 15)
+ OP ':' (1, 15) (1, 16)
+ NEWLINE '\\n' (1, 16) (1, 17)
+ INDENT ' ' (2, 0) (2, 2)
+ NAME 'def' (2, 2) (2, 5)
+ NAME 'foo' (2, 6) (2, 9)
+ OP '(' (2, 9) (2, 10)
+ AWAIT 'await' (2, 10) (2, 15)
+ OP ')' (2, 15) (2, 16)
+ OP ':' (2, 16) (2, 17)
+ NEWLINE '\\n' (2, 17) (2, 18)
+ INDENT ' ' (3, 0) (3, 4)
+ AWAIT 'await' (3, 4) (3, 9)
+ OP '=' (3, 10) (3, 11)
+ NUMBER '1' (3, 12) (3, 13)
+ NEWLINE '\\n' (3, 13) (3, 14)
+ DEDENT '' (4, 2) (4, 2)
+ NAME 'if' (4, 2) (4, 4)
+ NUMBER '1' (4, 5) (4, 6)
+ OP ':' (4, 6) (4, 7)
+ NEWLINE '\\n' (4, 7) (4, 8)
+ INDENT ' ' (5, 0) (5, 4)
+ AWAIT 'await' (5, 4) (5, 9)
+ NEWLINE '\\n' (5, 9) (5, 10)
+ DEDENT '' (6, 0) (6, 0)
+ DEDENT '' (6, 0) (6, 0)
+ NAME 'async' (6, 0) (6, 5)
+ OP '+=' (6, 6) (6, 8)
+ NUMBER '1' (6, 9) (6, 10)
+ NEWLINE '\\n' (6, 10) (6, 11)
++ """)
+
- >>> dump_tokens('''async def foo():
- ... async for i in 1: pass''')
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize('''\
++async def foo():
++ async for i in 1: pass''', """\
+ ASYNC 'async' (1, 0) (1, 5)
+ NAME 'def' (1, 6) (1, 9)
+ NAME 'foo' (1, 10) (1, 13)
+ OP '(' (1, 13) (1, 14)
+ OP ')' (1, 14) (1, 15)
+ OP ':' (1, 15) (1, 16)
+ NEWLINE '\\n' (1, 16) (1, 17)
+ INDENT ' ' (2, 0) (2, 2)
+ ASYNC 'async' (2, 2) (2, 7)
+ NAME 'for' (2, 8) (2, 11)
+ NAME 'i' (2, 12) (2, 13)
+ NAME 'in' (2, 14) (2, 16)
+ NUMBER '1' (2, 17) (2, 18)
+ OP ':' (2, 18) (2, 19)
+ NAME 'pass' (2, 20) (2, 24)
+ DEDENT '' (3, 0) (3, 0)
++ """)
+
- >>> dump_tokens('''async def foo(async): await''')
- ENCODING 'utf-8' (0, 0) (0, 0)
++ self.check_tokenize('''async def foo(async): await''', """\
+ ASYNC 'async' (1, 0) (1, 5)
+ NAME 'def' (1, 6) (1, 9)
+ NAME 'foo' (1, 10) (1, 13)
+ OP '(' (1, 13) (1, 14)
+ ASYNC 'async' (1, 14) (1, 19)
+ OP ')' (1, 19) (1, 20)
+ OP ':' (1, 20) (1, 21)
+ AWAIT 'await' (1, 22) (1, 27)
++ """)
++
++ self.check_tokenize('''\
++def f():
+
- >>> dump_tokens('''def f():
- ...
- ... def baz(): pass
- ... async def bar(): pass
- ...
- ... await = 2''')
- ENCODING 'utf-8' (0, 0) (0, 0)
++ def baz(): pass
++ async def bar(): pass
++
++ await = 2''', """\
+ NAME 'def' (1, 0) (1, 3)
+ NAME 'f' (1, 4) (1, 5)
+ OP '(' (1, 5) (1, 6)
+ OP ')' (1, 6) (1, 7)
+ OP ':' (1, 7) (1, 8)
+ NEWLINE '\\n' (1, 8) (1, 9)
+ NL '\\n' (2, 0) (2, 1)
+ INDENT ' ' (3, 0) (3, 2)
+ NAME 'def' (3, 2) (3, 5)
+ NAME 'baz' (3, 6) (3, 9)
+ OP '(' (3, 9) (3, 10)
+ OP ')' (3, 10) (3, 11)
+ OP ':' (3, 11) (3, 12)
+ NAME 'pass' (3, 13) (3, 17)
+ NEWLINE '\\n' (3, 17) (3, 18)
+ ASYNC 'async' (4, 2) (4, 7)
+ NAME 'def' (4, 8) (4, 11)
+ NAME 'bar' (4, 12) (4, 15)
+ OP '(' (4, 15) (4, 16)
+ OP ')' (4, 16) (4, 17)
+ OP ':' (4, 17) (4, 18)
+ NAME 'pass' (4, 19) (4, 23)
+ NEWLINE '\\n' (4, 23) (4, 24)
+ NL '\\n' (5, 0) (5, 1)
+ NAME 'await' (6, 2) (6, 7)
+ OP '=' (6, 8) (6, 9)
+ NUMBER '2' (6, 10) (6, 11)
+ DEDENT '' (7, 0) (7, 0)
++ """)
++
++ self.check_tokenize('''\
++async def f():
++
++ def baz(): pass
++ async def bar(): pass
+
- >>> dump_tokens('''async def f():
- ...
- ... def baz(): pass
- ... async def bar(): pass
- ...
- ... await = 2''')
- ENCODING 'utf-8' (0, 0) (0, 0)
++ await = 2''', """\
+ ASYNC 'async' (1, 0) (1, 5)
+ NAME 'def' (1, 6) (1, 9)
+ NAME 'f' (1, 10) (1, 11)
+ OP '(' (1, 11) (1, 12)
+ OP ')' (1, 12) (1, 13)
+ OP ':' (1, 13) (1, 14)
+ NEWLINE '\\n' (1, 14) (1, 15)
+ NL '\\n' (2, 0) (2, 1)
+ INDENT ' ' (3, 0) (3, 2)
+ NAME 'def' (3, 2) (3, 5)
+ NAME 'baz' (3, 6) (3, 9)
+ OP '(' (3, 9) (3, 10)
+ OP ')' (3, 10) (3, 11)
+ OP ':' (3, 11) (3, 12)
+ NAME 'pass' (3, 13) (3, 17)
+ NEWLINE '\\n' (3, 17) (3, 18)
+ ASYNC 'async' (4, 2) (4, 7)
+ NAME 'def' (4, 8) (4, 11)
+ NAME 'bar' (4, 12) (4, 15)
+ OP '(' (4, 15) (4, 16)
+ OP ')' (4, 16) (4, 17)
+ OP ':' (4, 17) (4, 18)
+ NAME 'pass' (4, 19) (4, 23)
+ NEWLINE '\\n' (4, 23) (4, 24)
+ NL '\\n' (5, 0) (5, 1)
+ AWAIT 'await' (6, 2) (6, 7)
+ OP '=' (6, 8) (6, 9)
+ NUMBER '2' (6, 10) (6, 11)
+ DEDENT '' (7, 0) (7, 0)
- """
-
- from test import support
- from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
- STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
- open as tokenize_open, Untokenizer)
- from io import BytesIO
- from unittest import TestCase, mock
- import os
- import token
++ """)
+
- def dump_tokens(s):
- """Print out the tokens in s in a table format.
- The ENDMARKER is omitted.
- """
- f = BytesIO(s.encode('utf-8'))
- for type, token, start, end, line in tokenize(f.readline):
- if type == ENDMARKER:
- break
- type = tok_name[type]
- print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())
-
- def roundtrip(f):
- """
- Test roundtrip for `untokenize`. `f` is an open file or a string.
- The source code in f is tokenized to both 5- and 2-tuples.
- Both sequences are converted back to source code via
- tokenize.untokenize(), and the latter tokenized again to 2-tuples.
- The test fails if the 3 pair tokenizations do not match.
-
- When untokenize bugs are fixed, untokenize with 5-tuples should
- reproduce code that does not contain a backslash continuation
- following spaces. A proper test should test this.
-
- This function would be more useful for correcting bugs if it reported
- the first point of failure, like assertEqual, rather than just
- returning False -- or if it were only used in unittests and not
- doctest and actually used assertEqual.
- """
- # Get source code and original tokenizations
- if isinstance(f, str):
- code = f.encode('utf-8')
- else:
- code = f.read()
- f.close()
- readline = iter(code.splitlines(keepends=True)).__next__
- tokens5 = list(tokenize(readline))
- tokens2 = [tok[:2] for tok in tokens5]
- # Reproduce tokens2 from pairs
- bytes_from2 = untokenize(tokens2)
- readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
- tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
- # Reproduce tokens2 from 5-tuples
- bytes_from5 = untokenize(tokens5)
- readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
- tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
- # Compare 3 versions
- return tokens2 == tokens2_from2 == tokens2_from5
-
- # This is an example from the docs, set up as a doctest.
def decistmt(s):
- """Substitute Decimals for floats in a string of statements.
-
- >>> from decimal import Decimal
- >>> s = 'print(+21.3e-5*-.1234/81.7)'
- >>> decistmt(s)
- "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))"
-
- The format of the exponent is inherited from the platform C library.
- Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
- we're only showing 11 digits, and the 12th isn't close to 5, the
- rest of the output should be platform-independent.
-
- >>> exec(s) #doctest: +ELLIPSIS
- -3.2171603427...e-0...7
-
- Output from calculations with Decimal should be identical across all
- platforms.
-
- >>> exec(decistmt(s))
- -3.217160342717258261933904529E-7
- """
result = []
g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
for toknum, tokval, _, _, _ in g: