Issue #4008: Fix problems with non-ASCII source files.

author Martin v. Löwis <martin@v.loewis.de>

Sun, 18 Jan 2009 20:15:42 +0000 (20:15 +0000)

committer Martin v. Löwis <martin@v.loewis.de>

Sun, 18 Jan 2009 20:15:42 +0000 (20:15 +0000)
author Martin v. Löwis <martin@v.loewis.de>
Sun, 18 Jan 2009 20:15:42 +0000 (20:15 +0000)
committer Martin v. Löwis <martin@v.loewis.de>
Sun, 18 Jan 2009 20:15:42 +0000 (20:15 +0000)
diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py

index 516cb7566ab40306ecde707af5a41b066af8cb57..71c0163aff85bd30a93c3bc773e1e291d5b6082a 100644 (file)
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -74,10 +74,11 @@ def coding_spec(data):
      Raise a LookupError if the encoding is declared but unknown.
      """
      if isinstance(data, bytes):
-        try:
-            lines = data.decode('utf-8')
-        except UnicodeDecodeError:
-            return None
+        # This encoding might be wrong. However, the coding
+        # spec must be ASCII-only, so any non-ASCII characters
+        # around here will be ignored. Decoding to Latin-1 should
+        # never fail (except for memory outage)
+        lines = data.decode('iso-8859-1')
      else:
          lines = data
      # consider only the first two lines
diff --git a/Lib/idlelib/NEWS.txt b/Lib/idlelib/NEWS.txt

index c91a57b7d88d0e47b866db92bc8af863b87fc618..94ab7f74622d213a683dd304927c3cd304e27ba0 100644 (file)
--- a/Lib/idlelib/NEWS.txt
+++ b/Lib/idlelib/NEWS.txt
@@ -3,6 +3,8 @@ What's New in IDLE 3.1a1?
  
  *Release date: XX-XXX-XXXX*
  
+- Issue #4008: Fix problems with non-ASCII source files.
+
  - Issue #4323: Always encode source as UTF-8 without asking
    the user (unless a different encoding is declared); remove
    user configuration of source encoding; all according to
diff --git a/Lib/idlelib/ScriptBinding.py b/Lib/idlelib/ScriptBinding.py

index 1e98f1e50e2f4f5abd6e48c3bf31aa154bf80fda..37b7b517c510cb5c612bdb3f84ad1f49b7c0fa9f 100644 (file)
--- a/Lib/idlelib/ScriptBinding.py
+++ b/Lib/idlelib/ScriptBinding.py
@@ -24,7 +24,7 @@ import tabnanny
  import tokenize
  import tkinter.messagebox as tkMessageBox
  from idlelib.EditorWindow import EditorWindow
-from idlelib import PyShell
+from idlelib import PyShell, IOBinding
  
  from idlelib.configHandler import idleConf
  
@@ -62,7 +62,13 @@ class ScriptBinding:
              return 'break'
  
      def tabnanny(self, filename):
-        f = open(filename, 'r')
+        # XXX: tabnanny should work on binary files as well
+        with open(filename, 'r', encoding='iso-8859-1') as f:
+            two_lines = f.readline() + f.readline()
+        encoding = IOBinding.coding_spec(two_lines)
+        if not encoding:
+            encoding = 'utf-8'
+        f = open(filename, 'r', encoding=encoding)
          try:
              tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
          except tokenize.TokenError as msg:
@@ -82,14 +88,14 @@ class ScriptBinding:
          self.shell = shell = self.flist.open_shell()
          saved_stream = shell.get_warning_stream()
          shell.set_warning_stream(shell.stderr)
-        f = open(filename, 'r')
+        f = open(filename, 'rb')
          source = f.read()
          f.close()
-        if '\r' in source:
-            source = re.sub(r"\r\n", "\n", source)
-            source = re.sub(r"\r", "\n", source)
-        if source and source[-1] != '\n':
-            source = source + '\n'
+        if b'\r' in source:
+            source = source.replace(b'\r\n', b'\n')
+            source = source.replace(b'\r', b'\n')
+        if source and source[-1] != ord(b'\n'):
+            source = source + b'\n'
          editwin = self.editwin
          text = editwin.text
          text.tag_remove("ERROR", "1.0", "end")
author	Martin v. Löwis <martin@v.loewis.de>
	Sun, 18 Jan 2009 20:15:42 +0000 (20:15 +0000)
committer	Martin v. Löwis <martin@v.loewis.de>
	Sun, 18 Jan 2009 20:15:42 +0000 (20:15 +0000)
Lib/idlelib/IOBinding.py		patch \| blob \| history
Lib/idlelib/NEWS.txt		patch \| blob \| history
Lib/idlelib/ScriptBinding.py		patch \| blob \| history