Correction for issue1134: all source files with a coding spec, except latin-1

author Amaury Forgeot d'Arc <amauryfa@gmail.com>

Thu, 15 Nov 2007 23:19:43 +0000 (23:19 +0000)

committer Amaury Forgeot d'Arc <amauryfa@gmail.com>

Thu, 15 Nov 2007 23:19:43 +0000 (23:19 +0000)
author Amaury Forgeot d'Arc <amauryfa@gmail.com>
Thu, 15 Nov 2007 23:19:43 +0000 (23:19 +0000)
committer Amaury Forgeot d'Arc <amauryfa@gmail.com>
Thu, 15 Nov 2007 23:19:43 +0000 (23:19 +0000)
diff --git a/Lib/test/test_coding.py b/Lib/test/test_coding.py

index 4d4b3f966a07dd3300a33d109f6115f408abb651..0ff1bdf0abab6a38c147d6e54e5cd0743a02c4d7 100644 (file)
--- a/Lib/test/test_coding.py
+++ b/Lib/test/test_coding.py
@@ -1,6 +1,6 @@
  
  import test.test_support, unittest
-import os
+import os, sys
  
  class CodingTest(unittest.TestCase):
      def test_bad_coding(self):
@@ -26,6 +26,26 @@ class CodingTest(unittest.TestCase):
          exec('# coding: cp949\na = 5\n', d)
          self.assertEqual(d['a'], 5)
  
+    def test_file_parse(self):
+        # issue1134: all encodings outside latin-1 and utf-8 fail on
+        # multiline strings and long lines (>512 columns)
+        sys.path.insert(0, ".")
+        filename = test.test_support.TESTFN+".py"
+        f = open(filename, "w")
+        try:
+            f.write("# -*- coding: cp1252 -*-\n")
+            f.write("'''A short string\n")
+            f.write("'''\n")
+            f.write("'A very long string %s'\n" % ("X" * 1000))
+            f.close()
+
+            __import__(test.test_support.TESTFN)
+        finally:
+            f.close()
+            os.remove(test.test_support.TESTFN+".py")
+            os.remove(test.test_support.TESTFN+".pyc")
+            sys.path.pop(0)
+
  def test_main():
      test.test_support.run_unittest(CodingTest)
  
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c

index 099f6dfbb51278d567ff8a94b5fea1f3dc8eec41..710c566b34e13e2763cf1f93c8754d4ada7dbd87 100644 (file)
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -369,46 +369,61 @@ check_bom(int get_char(struct tok_state *),
  static char *
  fp_readl(char *s, int size, struct tok_state *tok)
  {
-       PyObject* bufobj = tok->decoding_buffer;
+       PyObject* bufobj;
         const char *buf;
         Py_ssize_t buflen;
-       int allocated = 0;
  
         /* Ask for one less byte so we can terminate it */
         assert(size > 0);
         size--;
  
-       if (bufobj == NULL) {
+       if (tok->decoding_buffer) {
+               bufobj = tok->decoding_buffer;
+               Py_INCREF(bufobj);
+       }
+       else
+       {
                 bufobj = PyObject_CallObject(tok->decoding_readline, NULL);
                 if (bufobj == NULL)
                         goto error;
-               allocated = 1;
         }
-       buf = PyUnicode_AsStringAndSize(bufobj, &buflen);
-       if (buf == NULL) {
-               goto error;
+       if (PyUnicode_CheckExact(bufobj))
+       {
+               buf = PyUnicode_AsStringAndSize(bufobj, &buflen);
+               if (buf == NULL) {
+                       goto error;
+               }
         }
+       else
+       {
+               buf = PyBytes_AsString(bufobj);
+               if (buf == NULL) {
+                       goto error;
+               }
+               buflen = PyBytes_GET_SIZE(bufobj);
+       }
+
+       Py_XDECREF(tok->decoding_buffer);
         if (buflen > size) {
-               Py_XDECREF(tok->decoding_buffer);
+               /* Too many chars, the rest goes into tok->decoding_buffer */
                 tok->decoding_buffer = PyBytes_FromStringAndSize(buf+size,
                                                                  buflen-size);
                 if (tok->decoding_buffer == NULL)
                         goto error;
                 buflen = size;
         }
+       else
+               tok->decoding_buffer = NULL;
+
         memcpy(s, buf, buflen);
         s[buflen] = '\0';
         if (buflen == 0) /* EOF */
                 s = NULL;
-       if (allocated) {
-               Py_DECREF(bufobj);
-       }
+       Py_DECREF(bufobj);
         return s;
  
  error:
-       if (allocated) {
-               Py_XDECREF(bufobj);
-       }
+       Py_XDECREF(bufobj);
         return error_ret(tok);
  }
author	Amaury Forgeot d'Arc <amauryfa@gmail.com>
	Thu, 15 Nov 2007 23:19:43 +0000 (23:19 +0000)
committer	Amaury Forgeot d'Arc <amauryfa@gmail.com>
	Thu, 15 Nov 2007 23:19:43 +0000 (23:19 +0000)
Lib/test/test_coding.py		patch \| blob \| history
Parser/tokenizer.c		patch \| blob \| history