]> granicus.if.org Git - python/commitdiff
ignore the coding cookie in compile(), exec(), and eval() if the source is a string...
authorBenjamin Peterson <benjamin@python.org>
Mon, 2 Mar 2009 23:31:26 +0000 (23:31 +0000)
committerBenjamin Peterson <benjamin@python.org>
Mon, 2 Mar 2009 23:31:26 +0000 (23:31 +0000)
Include/parsetok.h
Include/pythonrun.h
Lib/test/test_coding.py
Lib/test/test_pep263.py
Misc/NEWS
Parser/parsetok.c
Parser/tokenizer.c
Parser/tokenizer.h
Python/bltinmodule.c
Python/pythonrun.c

index 81f17719b15bfa9fb434e7dd4704cb6cce6b937e..fa402f862a0a80d105bb7e1edcdc0c4c3c97219f 100644 (file)
@@ -29,6 +29,8 @@ typedef struct {
 #define PyPARSE_UNICODE_LITERALS        0x0008
 #endif
 
+#define PyPARSE_IGNORE_COOKIE 0x0010
+
 PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int,
                                               perrdetail *);
 PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int,
index e57b7f0abbf2dd4c26aa40dfe8aa6203dd59c846..c909e1a87ada0dad626bdc09493adc87b5605157 100644 (file)
@@ -12,6 +12,7 @@ extern "C" {
 #define PyCF_SOURCE_IS_UTF8  0x0100
 #define PyCF_DONT_IMPLY_DEDENT 0x0200
 #define PyCF_ONLY_AST 0x0400
+#define PyCF_IGNORE_COOKIE 0x0800
 
 typedef struct {
        int cf_flags;  /* bitmask of CO_xxx flags relevant to future */
index ade8bdfde9250473e341013118fb82c07b6f2759..51873b40de9baae31c3e25d40a11c28ecaaf141b 100644 (file)
@@ -17,10 +17,10 @@ class CodingTest(unittest.TestCase):
 
         path = os.path.dirname(__file__)
         filename = os.path.join(path, module_name + '.py')
-        fp = open(filename, encoding='utf-8')
-        text = fp.read()
+        fp = open(filename, "rb")
+        bytes = fp.read()
         fp.close()
-        self.assertRaises(SyntaxError, compile, text, filename, 'exec')
+        self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')
 
     def test_exec_valid_coding(self):
         d = {}
index 72764f9935dd15c4544afae4ab11c843ee5a44ba..05ca47ff43fe189778ff587327a97b6c8f31681c 100644 (file)
@@ -30,6 +30,12 @@ class PEP263Test(unittest.TestCase):
         else:
             self.fail()
 
+    def test_issue4626(self):
+        c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec")
+        d = {}
+        exec(c, d)
+        self.assertEquals(d['\xc6'], '\xc6')
+
 def test_main():
     support.run_unittest(PEP263Test)
 
index 91c94fff6a4a374a912ba3c37eb8fee700004eb9..d737f744493934801eb65adcce384f3fddcbc17c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -19,6 +19,9 @@ Core and Builtins
 - Issue #5249: time.strftime returned malformed string when format string
   contained non ascii character on windows.
 
+- Issue #4626: compile(), exec(), and eval() ignore the coding cookie if the
+  source has already been decoded into str.
+
 - Issue #5186: Reduce hash collisions for objects with no __hash__ method by
   rotating the object pointer by 4 bits to the right.
 
index d8ff6ee4209a03d7bc7f02ec1b69a78a89a6d3d8..4c3b5065e33f7a9fb38c2733cc75107139272885 100644 (file)
@@ -49,7 +49,11 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
 
        initerr(err_ret, filename);
 
-       if ((tok = PyTokenizer_FromString(s)) == NULL) {
+       if (*flags & PyPARSE_IGNORE_COOKIE)
+               tok = PyTokenizer_FromUTF8(s);
+       else
+               tok = PyTokenizer_FromString(s);
+       if (tok == NULL) {
                err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
                return NULL;
        }
index 3d52bedc0e675291da79401977c2e6989c42fc74..c4f447d2eed4c0283573fe467a5c77a866a26bc9 100644 (file)
@@ -715,6 +715,28 @@ PyTokenizer_FromString(const char *str)
        return tok;
 }
 
+struct tok_state *
+PyTokenizer_FromUTF8(const char *str)
+{
+       struct tok_state *tok = tok_new();
+       if (tok == NULL)
+               return NULL;
+       tok->decoding_state = STATE_RAW;
+       tok->read_coding_spec = 1;
+       tok->enc = NULL;
+       tok->str = str;
+       tok->encoding = (char *)PyMem_MALLOC(6);
+       if (!tok->encoding) {
+               PyTokenizer_Free(tok);
+               return NULL;
+       }
+       strcpy(tok->encoding, "utf-8");
+
+       /* XXX: constify members. */
+       tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
+       return tok;
+}
+
 
 /* Set up tokenizer for file */
 
index df9cbc74b94569f64ca9af57fe3a6a90ddf73663..e3328f1a5865defdf5a3a233aa369c88d5a28ce0 100644 (file)
@@ -61,6 +61,7 @@ struct tok_state {
 };
 
 extern struct tok_state *PyTokenizer_FromString(const char *);
+extern struct tok_state *PyTokenizer_FromUTF8(const char *);
 extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
                                              char *, char *);
 extern void PyTokenizer_Free(struct tok_state *);
index 98056970a67847db4910d7960feda9c385f9a5cb..7a27fba42ec997c5329ed536c77c04d5c5d7d1e8 100644 (file)
@@ -494,12 +494,13 @@ PyDoc_STR(
 
 
 static char *
-source_as_string(PyObject *cmd, char *funcname, char *what)
+source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf)
 {
        char *str;
        Py_ssize_t size;
 
        if (PyUnicode_Check(cmd)) {
+               cf->cf_flags |= PyCF_IGNORE_COOKIE;
                cmd = _PyUnicode_AsDefaultEncodedString(cmd, NULL);
                if (cmd == NULL)
                        return NULL;
@@ -591,7 +592,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
                return result;
        }
 
-       str = source_as_string(cmd, "compile", "string, bytes, AST or code");
+       str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf);
        if (str == NULL)
                return NULL;
 
@@ -703,14 +704,14 @@ builtin_eval(PyObject *self, PyObject *args)
                return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals);
        }
 
-       str = source_as_string(cmd, "eval", "string, bytes or code");
+       cf.cf_flags = PyCF_SOURCE_IS_UTF8;
+       str = source_as_string(cmd, "eval", "string, bytes or code", &cf);
        if (str == NULL)
                return NULL;
 
        while (*str == ' ' || *str == '\t')
                str++;
 
-       cf.cf_flags = PyCF_SOURCE_IS_UTF8;
        (void)PyEval_MergeCompilerFlags(&cf);
        result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
        Py_XDECREF(tmp);
@@ -779,12 +780,13 @@ builtin_exec(PyObject *self, PyObject *args)
                v = PyEval_EvalCode((PyCodeObject *) prog, globals, locals);
        }
        else {
-               char *str = source_as_string(prog, "exec",
-                                            "string, bytes or code");
+               char *str;
                PyCompilerFlags cf;
+               cf.cf_flags = PyCF_SOURCE_IS_UTF8;
+               str = source_as_string(prog, "exec",
+                                            "string, bytes or code", &cf);
                if (str == NULL)
                        return NULL;
-               cf.cf_flags = PyCF_SOURCE_IS_UTF8;
                if (PyEval_MergeCompilerFlags(&cf))
                        v = PyRun_StringFlags(str, Py_file_input, globals,
                                              locals, &cf);
index 65c6f5f2da14598055728f2d7c2f51d5547a7784..dee18b63e10a9f593e94c56067c9b3918a04d174 100644 (file)
@@ -1002,9 +1002,17 @@ PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flag
 }
 
 /* compute parser flags based on compiler flags */
-#define PARSER_FLAGS(flags) \
-       ((flags) ? ((((flags)->cf_flags & PyCF_DONT_IMPLY_DEDENT) ? \
-                     PyPARSE_DONT_IMPLY_DEDENT : 0)) : 0)
+static int PARSER_FLAGS(PyCompilerFlags *flags)
+{
+       int parser_flags = 0;
+       if (!flags)
+               return 0;
+       if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT)
+               parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
+       if (flags->cf_flags & PyCF_IGNORE_COOKIE)
+               parser_flags |= PyPARSE_IGNORE_COOKIE;
+       return parser_flags;
+}
 
 #if 0
 /* Keep an example of flags with future keyword support. */