]> granicus.if.org Git - python/commitdiff
patch 680474 that fixes bug 679880: compile/eval/exec refused utf-8 bom
authorJust van Rossum <just@letterror.com>
Sun, 9 Feb 2003 20:38:48 +0000 (20:38 +0000)
committerJust van Rossum <just@letterror.com>
Sun, 9 Feb 2003 20:38:48 +0000 (20:38 +0000)
mark. Added unit test.

Lib/test/test_builtin.py
Parser/tokenizer.c

index 92e44d5f64d14e5d0f31f148fc17fe57f6fa15ed..2e00632de206a821c7bc48f03b95aa62da209ab1 100644 (file)
@@ -190,6 +190,8 @@ class BuiltinTest(unittest.TestCase):
 
     def test_compile(self):
         compile('print 1\n', '', 'exec')
+        bom = '\xef\xbb\xbf'
+        compile(bom + 'print 1\n', '', 'exec')
         self.assertRaises(TypeError, compile)
         self.assertRaises(ValueError, compile, 'print 42\n', '<string>', 'badmode')
         self.assertRaises(ValueError, compile, 'print 42\n', '<string>', 'single', 0xff)
@@ -305,6 +307,8 @@ class BuiltinTest(unittest.TestCase):
             self.assertEqual(eval(unicode('a'), globals, locals), 1)
             self.assertEqual(eval(unicode('b'), globals, locals), 200)
             self.assertEqual(eval(unicode('c'), globals, locals), 300)
+            bom = '\xef\xbb\xbf'
+            self.assertEqual(eval(bom + 'a', globals, locals), 1)
         self.assertRaises(TypeError, eval)
         self.assertRaises(TypeError, eval, ())
 
index aaed637a2fdec2845a6b4f6089a5ae987fc7916e..4952a3c44702e26ae0e3ccb83f30d8c045e01fa3 100644 (file)
@@ -506,14 +506,14 @@ decoding_feof(struct tok_state *tok)
 /* Fetch a byte from TOK, using the string buffer. */
 
 static int buf_getc(struct tok_state *tok) {
-       return *tok->str++;
+       return Py_CHARMASK(*tok->str++);
 }
 
 /* Unfetch a byte from TOK, using the string buffer. */
 
 static void buf_ungetc(int c, struct tok_state *tok) {
        tok->str--;
-       assert(*tok->str == c); /* tok->cur may point to read-only segment */
+       assert(Py_CHARMASK(*tok->str) == c);    /* tok->cur may point to read-only segment */
 }
 
 /* Set the readline function for TOK to ENC. For the string-based