From 01166da85ad4387129975b42587652ea38cef7ef Mon Sep 17 00:00:00 2001
From: "Kurt B. Kaiser" <kbk@shore.net>
Date: Mon, 16 Sep 2002 22:03:37 +0000
Subject: [PATCH] Merge Py Idle changes: Rev 1.5 tim_one Convert a pile of
 obvious "yes/no" functions to return bool.

Rev 1.6 gvanrossum
(partially merged previously, move line outside try: block)

Provisional fix for writefile() [SF bug # 541730].

The problem was that an exception can occur in the text.get() call or
in the write() call, when the text buffer contains non-ASCII
characters.  This causes the previous contents of the file to be lost.

The provisional fix is to call str(self.text.get(...)) *before*
opening the file, so that if the exception occurs, we never open the
file.

Two orthogonal better solutions have to wait for policy decisions:

1. We could try to encode the data as Latin-1 or as UTF-8; but that
   would require IDLE to grow a notion of file encoding which requires
   more thought.

2. We could make backups before overwriting a file.  This requires
   more thought because it needs to be fast and cross-platform and
   configurable.

Rev 1.7 gvanrossum
(previously merged with modifications by Stephen M. Gava)

Add primitive printing support for Unix and Windows.

Rev 1.8 loewis
Patch #590913: PEP 263 support.

Rev 1.9 gvanrossum
(tempfile.py interface -- deferred)

Rev 1.10 tim_one
whitespace normalization

Rev 1.11 nnorwitz
(deferred pending 1.9 integration)
---
 Lib/idlelib/IOBinding.py | 207 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 195 insertions(+), 12 deletions(-)

diff --git a/Lib/idlelib/IOBinding.py b/Lib/idlelib/IOBinding.py
index 496bc43183..58d1913c7e 100644
--- a/Lib/idlelib/IOBinding.py
+++ b/Lib/idlelib/IOBinding.py
@@ -6,6 +6,9 @@
 #     which will only understand the local convention.
 
 import os
+import types
+import sys
+import codecs
 import tempfile
 import tkFileDialog
 import tkMessageBox
@@ -32,6 +35,71 @@ from configHandler import idleConf
 #$ win <Alt-Shift-s>
 #$ unix <Control-x><w>
 
+try:
+    from codecs import BOM_UTF8
+except ImportError:
+    # only available since Python 2.3
+    BOM_UTF8 = '\xef\xbb\xbf'
+
+# Try setting the locale, so that we can find out
+# what encoding to use
+try:
+    import locale
+    locale.setlocale(locale.LC_CTYPE, "")
+except ImportError:
+    pass
+
+encoding = "ascii"
+if sys.platform == 'win32':
+    # On Windows, we could use "mbcs". However, to give the user
+    # a portable encoding name, we need to find the code page
+    try:
+        encoding = locale.getdefaultlocale()[1]
+        codecs.lookup(encoding)
+    except LookupError:
+        pass
+else:
+    try:
+        # Different things can fail here: the locale module may not be
+        # loaded, it may not offer nl_langinfo, or CODESET, or the
+        # resulting codeset may be unknown to Python. We ignore all
+        # these problems, falling back to ASCII
+        encoding = locale.nl_langinfo(locale.CODESET)
+        codecs.lookup(encoding)
+    except (NameError, AttributeError, LookupError):
+        # Try getdefaultlocale well: it parses environment variables,
+        # which may give a clue. Unfortunately, getdefaultlocale has
+        # bugs that can cause ValueError.
+        try:
+            encoding = locale.getdefaultlocale()[1]
+            codecs.lookup(encoding)
+        except (ValueError, LookupError):
+            pass
+
+encoding = encoding.lower()
+
+coding_re = re.compile("coding[:=]\s*([-\w_.]+)")
+def coding_spec(str):
+
+    """Return the encoding declaration according to PEP 263.
+    Raise LookupError if the encoding is declared but unknown."""
+
+    # Only consider the first two lines
+    str = str.split("\n")[:2]
+    str = "\n".join(str)
+
+    match = coding_re.search(str)
+    if not match:
+        return None
+    name = match.group(1)
+    # Check whether the encoding is known
+    import codecs
+    try:
+        codecs.lookup(name)
+    except LookupError:
+        # The standard encoding error does not indicate the encoding
+        raise LookupError, "Unknown encoding "+name
+    return name
 
 class IOBinding:
 
@@ -44,6 +112,7 @@ class IOBinding:
                                           self.save_as)
         self.__id_savecopy = self.text.bind("<<save-copy-of-window-as-file>>",
                                             self.save_a_copy)
+        self.fileencoding = None
         self.__id_print = self.text.bind("<<print-window>>", self.print_window)
         
     def close(self):
@@ -129,8 +198,9 @@ class IOBinding:
             f.close()
         except IOError, msg:
             tkMessageBox.showerror("I/O Error", str(msg), master=self.text)
-            return 0
+            return False
 
+        chars = self.decode(chars)
         # We now convert all end-of-lines to '\n's
         eol = r"(\r\n)|\n|\r"  # \r\n (Windows), \n (UNIX), or \r (Mac)
         chars = re.compile( eol ).sub( r"\n", chars )
@@ -142,7 +212,55 @@ class IOBinding:
         self.set_filename(filename)
         self.text.mark_set("insert", "1.0")
         self.text.see("insert")
-        return 1
+        return True
+
+    def decode(self, chars):
+        # Try to create a Unicode string. If that fails, let Tcl try
+        # its best
+
+        # Check presence of a UTF-8 signature first
+        if chars.startswith(BOM_UTF8):
+            try:
+                chars = chars[3:].decode("utf-8")
+            except UnicodeError:
+                # has UTF-8 signature, but fails to decode...
+                return chars
+            else:
+                # Indicates that this file originally had a BOM
+                self.fileencoding = BOM_UTF8
+                return chars
+
+        # Next look for coding specification
+        try:
+            enc = coding_spec(chars)
+        except LookupError, name:
+            tkMessageBox.showerror(
+                title="Error loading the file",
+                message="The encoding '%s' is not known to this Python "\
+                "installation. The file may not display correctly" % name,
+                master = self.text)
+            enc = None
+
+        if enc:
+            try:
+                return unicode(chars, enc)
+            except UnicodeError:
+                pass
+
+        # If it is ASCII, we need not to record anything
+        try:
+            return unicode(chars, 'ascii')
+        except UnicodeError:
+            pass
+
+        # Finally, try the locale's encoding. This is deprecated;
+        # the user should declare a non-ASCII encoding
+        try:
+            chars = unicode(chars, encoding)
+            self.fileencoding = encoding
+        except UnicodeError:
+            pass
+        return chars
 
     def maybesave(self):
         if self.get_saved():
@@ -190,18 +308,86 @@ class IOBinding:
 
     def writefile(self, filename):
         self.fixlastline()
+        chars = self.encode(self.text.get("1.0", "end-1c"))
         try:
             f = open(filename, "w")
-            chars = str(self.text.get("1.0", "end-1c"))
             f.write(chars)
             f.close()
             ## print "saved to", `filename`
-            return 1
+            return True
         except IOError, msg:
             tkMessageBox.showerror("I/O Error", str(msg),
                                    master=self.text)
-            return 0
+            return False
+
+    def encode(self, chars):
+        if isinstance(chars, types.StringType):
+            # This is either plain ASCII, or Tk was returning mixed-encoding
+            # text to us. Don't try to guess further.
+            return chars
+
+        # See whether there is anything non-ASCII in it.
+        # If not, no need to figure out the encoding.
+        try:
+            return chars.encode('ascii')
+        except UnicodeError:
+            pass
+
+        # If there is an encoding declared, try this first.
+        try:
+            enc = coding_spec(chars)
+            failed = None
+        except LookupError, msg:
+            failed = msg
+            enc = None
+        if enc:
+            try:
+                return chars.encode(enc)
+            except UnicodeError:
+                failed = "Invalid encoding '%s'" % enc
+
+        if failed:
+            tkMessageBox.showerror(
+                "I/O Error",
+                "%s. Saving as UTF-8" % failed,
+                master = self.text)
+
+        # If there was a UTF-8 signature, use that. This should not fail
+        if self.fileencoding == BOM_UTF8 or failed:
+            return BOM_UTF8 + chars.encode("utf-8")
+
+        # Try the original file encoding next, if any
+        if self.fileencoding:
+            try:
+                return chars.encode(self.fileencoding)
+            except UnicodeError:
+                tkMessageBox.showerror(
+                    "I/O Error",
+                    "Cannot save this as '%s' anymore. Saving as UTF-8" \
+                    % self.fileencoding,
+                    master = self.text)
+                return BOM_UTF8 + chars.encode("utf-8")
+
+        # Nothing was declared, and we had not determined an encoding
+        # on loading. Recommend an encoding line.
+        try:
+            chars = chars.encode(encoding)
+            enc = encoding
+        except UnicodeError:
+            chars = BOM_UTF8 + chars.encode("utf-8")
+            enc = "utf-8"
+        tkMessageBox.showerror(
+            "I/O Error",
+            "Non-ASCII found, yet no encoding declared. Add a line like\n"
+            "# -*- coding: %s -*- \nto your file" % enc,
+            master = self.text)
+        return chars
  
+    def fixlastline(self):
+        c = self.text.get("end-2c")
+        if c != '\n':
+            self.text.insert("end-1c", "\n")
+
     def print_window(self, event):
         tempfilename = None
         if self.get_saved():
@@ -214,7 +400,8 @@ class IOBinding:
         platform=os.name
         printPlatform=1
         if platform == 'posix': #posix platform
-            command = idleConf.GetOption('main','General','print-command-posix')
+            command = idleConf.GetOption('main','General',
+                                         'print-command-posix')
             command = command + " 2>&1"
         elif platform == 'nt': #win32 platform
             command = idleConf.GetOption('main','General','print-command-win')
@@ -226,7 +413,8 @@ class IOBinding:
             output = pipe.read().strip()
             status = pipe.close()
             if status:
-                output = "Printing failed (exit status 0x%x)\n" % status + output
+                output = "Printing failed (exit status 0x%x)\n" % \
+                         status + output
             if output:
                 output = "Printing command: %s\n" % repr(command) + output
                 tkMessageBox.showerror("Print status", output, master=self.text)
@@ -235,11 +423,6 @@ class IOBinding:
             tkMessageBox.showinfo("Print status", message, master=self.text)
         return "break"
     
-    def fixlastline(self):
-        c = self.text.get("end-2c")
-        if c != '\n':
-            self.text.insert("end-1c", "\n")
-
     opendialog = None
     savedialog = None
 
-- 
2.40.0