import sys
import os
-import re
+import tokenize
__all__ = ["getline", "clearcache", "checkcache"]
pass
else:
# No luck
-## print '*** Cannot stat', filename, ':', msg
return []
-## print("Refreshing cache for %s..." % fullname)
- try:
- fp = open(fullname, 'rU')
+ with open(fullname, 'rb') as fp:
+ coding, line = tokenize.detect_encoding(fp.readline)
+ with open(fullname, 'r', encoding=coding) as fp:
lines = fp.readlines()
- fp.close()
- except Exception as msg:
-## print '*** Cannot open', fullname, ':', msg
- return []
- coding = "utf-8"
- for line in lines[:2]:
- m = re.search(r"coding[:=]\s*([-\w.]+)", line)
- if m:
- coding = m.group(1)
- break
- try:
- lines = [line if isinstance(line, str) else str(line, coding)
- for line in lines]
- except:
- pass # Hope for the best
size, mtime = stat.st_size, stat.st_mtime
cache[filename] = size, mtime, lines, fullname
return lines
import re, string, sys
from token import *
from codecs import lookup, BOM_UTF8
-from itertools import chain, repeat
cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
import token
which tells you which encoding was used to decode the bytes stream.
"""
encoding, consumed = detect_encoding(readline)
- def readline_generator():
+ def readline_generator(consumed):
+ for line in consumed:
+ yield line
while True:
try:
yield readline()
except StopIteration:
return
- chained = chain(consumed, readline_generator())
+ chained = readline_generator(consumed)
return _tokenize(chained.__next__, encoding)