]> granicus.if.org Git - python/commitdiff
Bug #2301: Don't try decoding the source code into the original
authorMartin v. Löwis <martin@v.loewis.de>
Mon, 17 Mar 2008 20:43:42 +0000 (20:43 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Mon, 17 Mar 2008 20:43:42 +0000 (20:43 +0000)
encoding for syntax errors.

Lib/test/test_pep263.py
Misc/NEWS
Parser/parsetok.c
Parser/tokenizer.c

index cc126ba687c571d69b282dc368cd0fea6e938e01..92065c9fe1513529c261b46953687b0fcddbd4d8 100644 (file)
@@ -23,6 +23,13 @@ class PEP263Test(unittest.TestCase):
         exec(c, d)\r
         self.assertEqual(d['u'], '\xf3')\r
 \r
+    def test_issue2301(self):\r
+        try:\r
+            compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")\r
+        except SyntaxError as v:\r
+            self.assertEquals(v.text, "print '\u5e74'")\r
+        else:\r
+            self.fail()\r
 \r
 def test_main():\r
     test_support.run_unittest(PEP263Test)\r
index 16652569c56f71180df5a82e513c43babd35c824..6c38150fee8a744929a295e25558a119eb551f37 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,12 @@ What's New in Python 3.0a4?
 
 *Release date: XX-XXX-2008*
 
+Core and Builtins
+-----------------
+
+- Bug #2301: Don't try decoding the source code into the original
+  encoding for syntax errors.
+
 Extension Modules
 -----------------
 
index 0b3314ec92a727ae32a0ccd2a2da716a69a19bf9..708c26df22f642fa0184dec5a851a53a6c484c6a 100644 (file)
@@ -213,21 +213,16 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
                        err_ret->error = E_EOF;
                err_ret->lineno = tok->lineno;
                if (tok->buf != NULL) {
-                       char *text = NULL;
                        size_t len;
                        assert(tok->cur - tok->buf < INT_MAX);
                        err_ret->offset = (int)(tok->cur - tok->buf);
                        len = tok->inp - tok->buf;
-                       text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
-                       if (text == NULL) {
-                               text = (char *) PyObject_MALLOC(len + 1);
-                               if (text != NULL) {
-                                       if (len > 0)
-                                               strncpy(text, tok->buf, len);
-                                       text[len] = '\0';
-                               }
+                       err_ret->text = (char *) PyObject_MALLOC(len + 1);
+                       if (err_ret->text != NULL) {
+                               if (len > 0)
+                                       strncpy(err_ret->text, tok->buf, len);
+                               err_ret->text[len] = '\0';
                        }
-                       err_ret->text = text;
                }
        } else if (tok->encoding != NULL) {
                node* r = PyNode_New(encoding_decl);
index 2833e532f7f38f56d4420c7356fd2055b957e9ef..0b8341a0a2742274a6b449520c692fb9288f5bb9 100644 (file)
@@ -1579,70 +1579,6 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
        return result;
 }
 
-/* This function is only called from parsetok. However, it cannot live
-   there, as it must be empty for PGEN, and we can check for PGEN only
-   in this file. */
-
-#ifdef PGEN
-char*
-PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
-{
-       return NULL;
-}
-#else
-static PyObject *
-dec_utf8(const char *enc, const char *text, size_t len) {
-       PyObject *ret = NULL;
-       PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
-       if (unicode_text) {
-               ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
-               Py_DECREF(unicode_text);
-       }
-       if (!ret) {
-               PyErr_Clear();
-       }
-        else {
-               assert(PyString_Check(ret));
-       }
-       return ret;
-}
-
-char *
-PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
-{
-       char *text = NULL;
-       if (tok->encoding) {
-               /* convert source to original encondig */
-               PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
-               if (lineobj != NULL) {
-                       int linelen = PyString_GET_SIZE(lineobj);
-                       const char *line = PyString_AS_STRING(lineobj);
-                       text = PyObject_MALLOC(linelen + 1);
-                       if (text != NULL && line != NULL) {
-                               if (linelen)
-                                       strncpy(text, line, linelen);
-                               text[linelen] = '\0';
-                       }
-                       Py_DECREF(lineobj);
-
-                       /* adjust error offset */
-                       if (*offset > 1) {
-                               PyObject *offsetobj = dec_utf8(tok->encoding,
-                                                              tok->buf,
-                                                              *offset-1);
-                               if (offsetobj) {
-                                       *offset = 1 + Py_SIZE(offsetobj);
-                                       Py_DECREF(offsetobj);
-                               }
-                       }
-
-               }
-       }
-       return text;
-
-}
-#endif
-
 /* Get -*- encoding -*- from a Python file.
 
    PyTokenizer_FindEncoding returns NULL when it can't find the encoding in