reuse tokenize.detect_encoding in linecache instead of a custom solution

author Benjamin Peterson <benjamin@python.org>

Tue, 24 Mar 2009 22:30:15 +0000 (22:30 +0000)

committer Benjamin Peterson <benjamin@python.org>

Tue, 24 Mar 2009 22:30:15 +0000 (22:30 +0000)
author Benjamin Peterson <benjamin@python.org>
Tue, 24 Mar 2009 22:30:15 +0000 (22:30 +0000)
committer Benjamin Peterson <benjamin@python.org>
Tue, 24 Mar 2009 22:30:15 +0000 (22:30 +0000)
diff --git a/Lib/linecache.py b/Lib/linecache.py

index 6a9535ed1f4336b7d1983e5cb4ee7f16c5300f66..51404e2698d3cb78f834759ce9fed7c6e859ca97 100644 (file)
--- a/Lib/linecache.py
+++ b/Lib/linecache.py
@@ -7,7 +7,7 @@ that name.
  
  import sys
  import os
-import re
+import tokenize
  
  __all__ = ["getline", "clearcache", "checkcache"]
  
@@ -120,27 +120,11 @@ def updatecache(filename, module_globals=None):
                      pass
          else:
              # No luck
-##          print '*** Cannot stat', filename, ':', msg
              return []
-##  print("Refreshing cache for %s..." % fullname)
-    try:
-        fp = open(fullname, 'rU')
+    with open(fullname, 'rb') as fp:
+        coding, line = tokenize.detect_encoding(fp.readline)
+    with open(fullname, 'r', encoding=coding) as fp:
          lines = fp.readlines()
-        fp.close()
-    except Exception as msg:
-##      print '*** Cannot open', fullname, ':', msg
-        return []
-    coding = "utf-8"
-    for line in lines[:2]:
-        m = re.search(r"coding[:=]\s*([-\w.]+)", line)
-        if m:
-            coding = m.group(1)
-            break
-    try:
-        lines = [line if isinstance(line, str) else str(line, coding)
-                 for line in lines]
-    except:
-        pass  # Hope for the best
      size, mtime = stat.st_size, stat.st_mtime
      cache[filename] = size, mtime, lines, fullname
      return lines
diff --git a/Lib/tokenize.py b/Lib/tokenize.py

index 16c4f3f029830e24020f251762912d62b8afd61d..4ff859d9d0dbdb10200e421a15d88c9d9a9426f8 100644 (file)
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -27,7 +27,6 @@ __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
  import re, string, sys
  from token import *
  from codecs import lookup, BOM_UTF8
-from itertools import chain, repeat
  cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
  
  import token
@@ -327,13 +326,15 @@ def tokenize(readline):
      which tells you which encoding was used to decode the bytes stream.
      """
      encoding, consumed = detect_encoding(readline)
-    def readline_generator():
+    def readline_generator(consumed):
+        for line in consumed:
+            yield line
          while True:
              try:
                  yield readline()
              except StopIteration:
                  return
-    chained = chain(consumed, readline_generator())
+    chained = readline_generator(consumed)
      return _tokenize(chained.__next__, encoding)
author	Benjamin Peterson <benjamin@python.org>
	Tue, 24 Mar 2009 22:30:15 +0000 (22:30 +0000)
committer	Benjamin Peterson <benjamin@python.org>
	Tue, 24 Mar 2009 22:30:15 +0000 (22:30 +0000)
Lib/linecache.py		patch \| blob \| history
Lib/tokenize.py		patch \| blob \| history