]> granicus.if.org Git - python/commitdiff
Patch #590913: PEP 263 support.
authorMartin v. Löwis <martin@v.loewis.de>
Mon, 5 Aug 2002 14:55:21 +0000 (14:55 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Mon, 5 Aug 2002 14:55:21 +0000 (14:55 +0000)
Tools/idle/IOBinding.py
Tools/idle/PyShell.py

index 925015f3ee55ed6091ce0f58ef8766e6abba81c3..0ea4524ccda76e56e2198724cb002ce2571d875c 100644 (file)
@@ -1,4 +1,8 @@
 import os
+import types
+import sys
+import codecs
+import re
 import tempfile
 import tkFileDialog
 import tkMessageBox
@@ -24,6 +28,71 @@ from IdleConf import idleconf
 #$ win <Control-p>
 #$ unix <Control-x><Control-p>
 
+try:
+    from codecs import BOM_UTF8
+except ImportError:
+    # only available since Python 2.3
+    BOM_UTF8 = '\xef\xbb\xbf'
+
+# Try setting the locale, so that we can find out
+# what encoding to use
+try:
+    import locale
+    locale.setlocale(locale.LC_CTYPE, "")
+except ImportError:
+    pass
+
+encoding = "ascii"
+if sys.platform == 'win32':
+    # On Windows, we could use "mbcs". However, to give the user
+    # a portable encoding name, we need to find the code page
+    try:
+        encoding = locale.getdefaultlocale()[1]
+        codecs.lookup(encoding)
+    except LookupError:
+        pass
+else:
+    try:
+        # Different things can fail here: the locale module may not be
+        # loaded, it may not offer nl_langinfo, or CODESET, or the
+        # resulting codeset may be unknown to Python. We ignore all
+        # these problems, falling back to ASCII
+        encoding = locale.nl_langinfo(locale.CODESET)
+        codecs.lookup(encoding)
+    except (NameError, AttributeError, LookupError):
+        # Try getdefaultlocale well: it parses environment variables,
+        # which may give a clue. Unfortunately, getdefaultlocale has
+        # bugs that can cause ValueError.
+        try:
+            encoding = locale.getdefaultlocale()[1]
+            codecs.lookup(encoding)
+        except (ValueError, LookupError):
+            pass
+
+encoding = encoding.lower()
+
+coding_re = re.compile("coding[:=]\s*([-\w_.]+)")
+def coding_spec(str):
+
+    """Return the encoding declaration according to PEP 263.
+    Raise LookupError if the encoding is declared but unknown."""
+
+    # Only consider the first two lines
+    str = str.split("\n")[:2]
+    str = "\n".join(str)
+
+    match = coding_re.search(str)
+    if not match:
+        return None
+    name = match.group(1)
+    # Check whether the encoding is known
+    import codecs
+    try:
+        codecs.lookup(name)
+    except LookupError:
+        # The standard encoding error does not indicate the encoding
+        raise LookupError, "Unknown encoding "+name
+    return name
 
 class IOBinding:
 
@@ -37,6 +106,7 @@ class IOBinding:
         self.__id_savecopy = self.text.bind("<<save-copy-of-window-as-file>>",
                                             self.save_a_copy)
         self.__id_print = self.text.bind("<<print-window>>", self.print_window)
+        self.fileencoding = None
 
     def close(self):
         # Undo command bindings
@@ -101,6 +171,9 @@ class IOBinding:
         except IOError, msg:
             tkMessageBox.showerror("I/O Error", str(msg), master=self.text)
             return False
+
+        chars = self.decode(chars)
+
         self.text.delete("1.0", "end")
         self.set_filename(None)
         self.text.insert("1.0", chars)
@@ -110,6 +183,54 @@ class IOBinding:
         self.text.see("insert")
         return True
 
+    def decode(self, chars):
+        # Try to create a Unicode string. If that fails, let Tcl try
+        # its best
+
+        # Check presence of a UTF-8 signature first
+        if chars.startswith(BOM_UTF8):
+            try:
+                chars = chars[3:].decode("utf-8")
+            except UnicodeError:
+                # has UTF-8 signature, but fails to decode...
+                return chars
+            else:
+                # Indicates that this file originally had a BOM
+                self.fileencoding = BOM_UTF8
+                return chars
+
+        # Next look for coding specification
+        try:
+            enc = coding_spec(chars)
+        except LookupError, name:
+            tkMessageBox.showerror(
+                title="Error loading the file",
+                message="The encoding '%s' is not known to this Python "\
+                "installation. The file may not display correctly" % name,
+                master = self.text)
+            enc = None
+            
+        if enc:
+            try:
+                return unicode(chars, enc)
+            except UnicodeError:
+                pass
+
+        # If it is ASCII, we need not to record anything
+        try:
+            return unicode(chars, 'ascii')
+        except UnicodeError:
+            pass
+
+        # Finally, try the locale's encoding. This is deprecated;
+        # the user should declare a non-ASCII encoding
+        try:
+            chars = unicode(chars, encoding)
+            self.fileencoding = encoding
+        except UnicodeError:
+            pass
+        return chars
+
     def maybesave(self):
         if self.get_saved():
             return "yes"
@@ -180,7 +301,7 @@ class IOBinding:
 
     def writefile(self, filename):
         self.fixlastline()
-        chars = str(self.text.get("1.0", "end-1c"))
+        chars = self.encode(self.text.get("1.0", "end-1c"))
         try:
             f = open(filename, "w")
             f.write(chars)
@@ -192,6 +313,68 @@ class IOBinding:
                                    master=self.text)
             return False
 
+    def encode(self, chars):
+        if isinstance(chars, types.StringType):
+            # This is either plain ASCII, or Tk was returning mixed-encoding
+            # text to us. Don't try to guess further.
+            return chars
+
+        # See whether there is anything non-ASCII in it.
+        # If not, no need to figure out the encoding.
+        try:
+            return chars.encode('ascii')
+        except UnicodeError:
+            pass
+
+        # If there is an encoding declared, try this first.
+        try:
+            enc = coding_spec(chars)
+            failed = None
+        except LookupError, msg:
+            failed = msg
+            enc = None
+        if enc:
+            try:
+                return chars.encode(enc)
+            except UnicodeError:
+                failed = "Invalid encoding '%s'" % enc
+
+        if failed:
+            tkMessageBox.showerror(
+                "I/O Error",
+                "%s. Saving as UTF-8" % failed,
+                master = self.text)
+
+        # If there was a UTF-8 signature, use that. This should not fail
+        if self.fileencoding == BOM_UTF8 or failed:
+            return BOM_UTF8 + chars.encode("utf-8")
+
+        # Try the original file encoding next, if any
+        if self.fileencoding:
+            try:
+                return chars.encode(self.fileencoding)
+            except UnicodeError:
+                tkMessageBox.showerror(
+                    "I/O Error",
+                    "Cannot save this as '%s' anymore. Saving as UTF-8" % self.fileencoding,
+                    master = self.text)
+                return BOM_UTF8 + chars.encode("utf-8")
+
+        # Nothing was declared, and we had not determined an encoding
+        # on loading. Recommend an encoding line.
+        try:
+            chars = chars.encode(encoding)
+            enc = encoding
+        except UnicodeError:
+            chars = BOM_UTF8 + chars.encode("utf-8")
+            enc = "utf-8"
+        tkMessageBox.showerror(
+            "I/O Error",
+            "Non-ASCII found, yet no encoding declared. Add a line like\n"
+            "# -*- coding: %s -*- \nto your file" % enc,
+            master = self.text)
+        return chars
+
     def fixlastline(self):
         c = self.text.get("end-2c")
         if c != '\n':
index cf854d3c3b389f4362e525cae133e29b14e68a01..31a89402cf02880ee0e1acecbafffb362264348e 100644 (file)
@@ -6,6 +6,7 @@ import string
 import getopt
 import re
 import warnings
+import types
 
 import linecache
 from code import InteractiveInterpreter
@@ -188,6 +189,9 @@ class ModifiedInterpreter(InteractiveInterpreter):
         self.more = 0
         self.save_warnings_filters = warnings.filters[:]
         warnings.filterwarnings(action="error", category=SyntaxWarning)
+        if isinstance(source, types.UnicodeType):
+            import IOBinding
+            source = source.encode(IOBinding.encoding)
         try:
             return InteractiveInterpreter.runsource(self, source, filename)
         finally: