Possible fix for Skip's bug 116136 (sre recursion limit hit in tokenize.py).

author Tim Peters <tim.peters@gmail.com>

Sat, 7 Oct 2000 05:09:39 +0000 (05:09 +0000)

committer Tim Peters <tim.peters@gmail.com>

Sat, 7 Oct 2000 05:09:39 +0000 (05:09 +0000)
author Tim Peters <tim.peters@gmail.com>
Sat, 7 Oct 2000 05:09:39 +0000 (05:09 +0000)
committer Tim Peters <tim.peters@gmail.com>
Sat, 7 Oct 2000 05:09:39 +0000 (05:09 +0000)
diff --git a/Lib/tokenize.py b/Lib/tokenize.py

index 30bb557347e19a362c3f19a5d913ded59195c9dd..f2ba0a28a1151ef5117f48ba9d72ab9b0d0a7817 100644 (file)
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -46,18 +46,25 @@ Floatnumber = group(Pointfloat, Expfloat)
  Imagnumber = group(r'0[jJ]', r'[1-9]\d*[jJ]', Floatnumber + r'[jJ]')
  Number = group(Imagnumber, Floatnumber, Intnumber)
  
-Single = any(r"[^'\\]", r'\\.') + "'"
-Double = any(r'[^"\\]', r'\\.') + '"'
-Single3 = any(r"[^'\\]",r'\\.',r"'[^'\\]",r"'\\.",r"''[^'\\]",r"''\\.") + "'''"
-Double3 = any(r'[^"\\]',r'\\.',r'"[^"\\]',r'"\\.',r'""[^"\\]',r'""\\.') + '"""'
+# Tail end of ' string.
+Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
+# Tail end of " string.
+Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
+# Tail end of ''' string.
+Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
+# Tail end of """ string.
+Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
  Triple = group("[rR]?'''", '[rR]?"""')
-String = group("[rR]?'" + any(r"[^\n'\\]", r'\\.') + "'",
-               '[rR]?"' + any(r'[^\n"\\]', r'\\.') + '"')
+# Single-line ' or " string.
+String = group(r"[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+               r'[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
  
-Operator = group('\+=', '\-=', '\*=', '%=', '/=', '\*\*=', '&=', '\|=',
-                 '\^=', '>>=', '<<=', '\+', '\-', '\*\*', '\*', '\^', '~',
-                 '/', '%', '&', '\|', '<<', '>>', '==', '<=', '<>', '!=',
-                 '>=', '=', '<', '>')
+# Because of leftmost-then-longest match semantics, be sure to put the
+# longest operators first (e.g., if = came before ==, == would get
+# recognized as two instances of =).
+Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
+                 r"[+\-*/%&|^=<>]=?",
+                 r"~")
  
  Bracket = '[][(){}]'
  Special = group(r'\r?\n', r'[:;.,`]')
@@ -66,8 +73,9 @@ Funny = group(Operator, Bracket, Special)
  PlainToken = group(Number, Funny, String, Name)
  Token = Ignore + PlainToken
  
-ContStr = group("[rR]?'" + any(r'\\.', r"[^\n'\\]") + group("'", r'\\\r?\n'),
-                '[rR]?"' + any(r'\\.', r'[^\n"\\]') + group('"', r'\\\r?\n'))
+# First (or only) line of ' or " string.
+ContStr = group(r"[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" + group("'", r'\\\r?\n'),
+                r'[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' + group('"', r'\\\r?\n'))
  PseudoExtras = group(r'\\\r?\n', Comment, Triple)
  PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
author	Tim Peters <tim.peters@gmail.com>
	Sat, 7 Oct 2000 05:09:39 +0000 (05:09 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Sat, 7 Oct 2000 05:09:39 +0000 (05:09 +0000)