]> granicus.if.org Git - python/commitdiff
Bytes literal.
authorThomas Wouters <thomas@python.org>
Fri, 23 Feb 2007 19:56:57 +0000 (19:56 +0000)
committerThomas Wouters <thomas@python.org>
Fri, 23 Feb 2007 19:56:57 +0000 (19:56 +0000)
15 files changed:
Include/Python-ast.h
Include/opcode.h
Lib/compiler/ast.py
Lib/compiler/pyassem.py
Lib/compiler/pycodegen.py
Lib/compiler/transformer.py
Lib/opcode.py
Lib/test/test_bytes.py
Lib/test/test_compiler.py
Parser/Python.asdl
Parser/tokenizer.c
Python/Python-ast.c
Python/ast.c
Python/ceval.c
Python/compile.c

index c2fabfb2c3ef06ef4bd9b2f0bc082f2022d9c8f5..66d7b52580ea6e569dee9f5b4d8729b799c34d8e 100644 (file)
@@ -176,9 +176,9 @@ struct _stmt {
 enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4,
                   IfExp_kind=5, Dict_kind=6, Set_kind=7, ListComp_kind=8,
                   GeneratorExp_kind=9, Yield_kind=10, Compare_kind=11,
-                  Call_kind=12, Num_kind=13, Str_kind=14, Ellipsis_kind=15,
-                  Attribute_kind=16, Subscript_kind=17, Name_kind=18,
-                  List_kind=19, Tuple_kind=20};
+                  Call_kind=12, Num_kind=13, Str_kind=14, Bytes_kind=15,
+                  Ellipsis_kind=16, Attribute_kind=17, Subscript_kind=18,
+                  Name_kind=19, List_kind=20, Tuple_kind=21};
 struct _expr {
         enum _expr_kind kind;
         union {
@@ -254,6 +254,10 @@ struct _expr {
                         string s;
                 } Str;
                 
+                struct {
+                        string s;
+                } Bytes;
+                
                 struct {
                         expr_ty value;
                         identifier attr;
@@ -465,6 +469,8 @@ expr_ty _Py_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty
 expr_ty _Py_Num(object n, int lineno, int col_offset, PyArena *arena);
 #define Str(a0, a1, a2, a3) _Py_Str(a0, a1, a2, a3)
 expr_ty _Py_Str(string s, int lineno, int col_offset, PyArena *arena);
+#define Bytes(a0, a1, a2, a3) _Py_Bytes(a0, a1, a2, a3)
+expr_ty _Py_Bytes(string s, int lineno, int col_offset, PyArena *arena);
 #define Ellipsis(a0, a1, a2) _Py_Ellipsis(a0, a1, a2)
 expr_ty _Py_Ellipsis(int lineno, int col_offset, PyArena *arena);
 #define Attribute(a0, a1, a2, a3, a4, a5) _Py_Attribute(a0, a1, a2, a3, a4, a5)
index 007816d8b318fda4ac56904f198a55f304043900..316ba4f715bacd5b4b10cea500c7f4dde8fc9927 100644 (file)
@@ -72,7 +72,7 @@ extern "C" {
 #define LOAD_LOCALS    82
 #define RETURN_VALUE   83
 #define IMPORT_STAR    84
-
+#define MAKE_BYTES     85
 #define YIELD_VALUE    86
 #define POP_BLOCK      87
 #define END_FINALLY    88
index bc283c08bd3d165039c15774da80445c8d8f7f06..4794d66da6b282f67c1fc1b0b93ac008898dbba8 100644 (file)
@@ -267,6 +267,20 @@ class Break(Node):
     def __repr__(self):
         return "Break()"
 
+class Bytes(Node):
+    def __init__(self, value, lineno=None):
+        self.value = value
+        self.lineno = lineno
+
+    def getChildren(self):
+        return self.value,
+
+    def getChildNodes(self):
+        return ()
+
+    def __repr__(self):
+        return "Bytes(%s)" % (repr(self.value),)
+
 class CallFunc(Node):
     def __init__(self, node, args, star_args = None, dstar_args = None, lineno=None):
         self.node = node
index cac899d239676c63c04d979d32b1d28873603304..f665c543b03ccbfb7ac94db6e035fdea42830e3c 100644 (file)
@@ -792,6 +792,7 @@ class StackDepthTracker:
         'DELETE_ATTR': -1,
         'STORE_GLOBAL': -1,
         'BUILD_MAP': 1,
+        'MAKE_BYTES': 0,
         'COMPARE_OP': -1,
         'STORE_FAST': -1,
         'IMPORT_STAR': -1,
index 8db4e0de725c33f49db23584f6acedc6f18f2b76..83fbc173ca649f3335933e74365c6f3d539f0eb6 100644 (file)
@@ -930,6 +930,10 @@ class CodeGenerator:
 
     def visitConst(self, node):
         self.emit('LOAD_CONST', node.value)
+    
+    def visitBytes(self, node):
+        self.emit('LOAD_CONST', node.value)
+        self.emit('MAKE_BYTES')
 
     def visitKeyword(self, node):
         self.emit('LOAD_CONST', node.name)
index 5f2face4abd432ee2822bc46d651c375231a5d5a..79b702ce30e5add268d01a7719765f847948862d 100644 (file)
@@ -745,9 +745,11 @@ class Transformer:
             return eval(lit)
 
     def atom_string(self, nodelist):
-        k = ''
-        for node in nodelist:
+        k = self.decode_literal(nodelist[0][1])
+        for node in nodelist[1:]:
             k += self.decode_literal(node[1])
+        if isinstance(k, bytes):
+          return Bytes(str(k), lineno=nodelist[0][2])
         return Const(k, lineno=nodelist[0][2])
 
     def atom_ellipsis(self, nodelist):
index 1e15582bfd6caf12ef6a46807cf480d87effd52a..69982f2b50344b93cbec325b07fa1e54e059b86f 100644 (file)
@@ -111,6 +111,7 @@ def_op('WITH_CLEANUP', 81)
 def_op('LOAD_LOCALS', 82)
 def_op('RETURN_VALUE', 83)
 def_op('IMPORT_STAR', 84)
+def_op('MAKE_BYTES', 85)
 def_op('YIELD_VALUE', 86)
 def_op('POP_BLOCK', 87)
 def_op('END_FINALLY', 88)
index 997122b41fe8444ce8053d3128699cbc39906401..4dee01b7f20e837b1435ce9a6cac684542190d79 100644 (file)
@@ -403,7 +403,19 @@ class BytesTest(unittest.TestCase):
             self.assertEqual(bytes.join(tuple(lst)), bytes("abc"))
             self.assertEqual(bytes.join(iter(lst)), bytes("abc"))
         # XXX more...
-            
+
+    def test_literal(self):
+        tests =  [
+            (b"Wonderful spam", u"Wonderful spam"),
+            (br"Wonderful spam too", u"Wonderful spam too"),
+            (b"\xaa\x00\000\200", u"\xaa\x00\000\200"),
+            (br"\xaa\x00\000\200", ur"\xaa\x00\000\200"),
+        ]
+        for b, s in tests:
+            self.assertEqual(b, bytes(s, 'latin-1'))
+        for c in range(128, 256):
+            self.assertRaises(SyntaxError, eval,
+                              'b"%s"' % chr(c))
 
     # Optimizations:
     # __iter__? (optimization)
index ab9a66045aff926048fada4f77c91be7d3999772..bbd75119006e5838baf51578e14d30f5ac31096f 100644 (file)
@@ -187,6 +187,30 @@ class CompilerTest(unittest.TestCase):
         exec(c, dct)
         self.assertEquals(dct.get('result'), 1)
 
+    def testBytesLiteral(self):
+        c = compiler.compile("b'foo'", '<string>', 'eval')
+        b = eval(c)
+        
+        c = compiler.compile('def f(b=b"foo"):\n'
+                             '    b[0] += 1\n'
+                             '    return b\n'
+                             'f(); f(); result = f()\n',
+                             '<string>',
+                             'exec')
+        dct = {}
+        exec(c, dct)
+        self.assertEquals(dct.get('result'), b"ioo")
+        
+        c = compiler.compile('def f():\n'
+                             '    b = b"foo"\n'
+                             '    b[0] += 1\n'
+                             '    return b\n'
+                             'f(); f(); result = f()\n',
+                             '<string>',
+                             'exec')
+        dct = {}
+        exec(c, dct)
+        self.assertEquals(dct.get('result'), b"goo")
 
 NOLINENO = (compiler.ast.Module, compiler.ast.Stmt, compiler.ast.Discard)
 
index ea11349bc969e86a216b1897c797d45258f4e9ca..fd47aa0bc5199e8ba5a288609997d586b320a151 100644 (file)
@@ -60,6 +60,7 @@ module Python version "$Revision$"
                         expr? starargs, expr? kwargs)
             | Num(object n) -- a number as a PyObject.
             | Str(string s) -- need to specify raw, unicode, etc?
+            | Bytes(string s)
             | Ellipsis
             -- other literals? bools?
 
index 84b7232cf972cdd4502554a112588a07b4371ab6..84bd60eace1b035e6b3fdfa3ceae90ae586ef4a0 100644 (file)
@@ -1244,6 +1244,14 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
                        if (c == '"' || c == '\'')
                                goto letter_quote;
                        break;
+               case 'b':
+               case 'B':
+                       c = tok_nextc(tok);
+                       if (c == 'r' || c == 'R')
+                               c = tok_nextc(tok);
+                       if (c == '"' || c == '\'')
+                               goto letter_quote;
+                       break;
                }
                while (isalnum(c) || c == '_') {
                        c = tok_nextc(tok);
index ae3a39604875e9ce043f4361b229e02f5821eb56..390ba157e831865c59ff8793561bd7a64193d898 100644 (file)
@@ -2,7 +2,7 @@
 
 
 /*
-   __version__ 53731.
+   __version__ 53866.
 
    This module must be committed separately after each AST grammar change;
    The __version__ number is set to the revision number of the commit
@@ -216,6 +216,10 @@ static PyTypeObject *Str_type;
 static char *Str_fields[]={
         "s",
 };
+static PyTypeObject *Bytes_type;
+static char *Bytes_fields[]={
+        "s",
+};
 static PyTypeObject *Ellipsis_type;
 static PyTypeObject *Attribute_type;
 static char *Attribute_fields[]={
@@ -547,6 +551,8 @@ static int init_types(void)
         if (!Num_type) return 0;
         Str_type = make_type("Str", expr_type, Str_fields, 1);
         if (!Str_type) return 0;
+        Bytes_type = make_type("Bytes", expr_type, Bytes_fields, 1);
+        if (!Bytes_type) return 0;
         Ellipsis_type = make_type("Ellipsis", expr_type, NULL, 0);
         if (!Ellipsis_type) return 0;
         Attribute_type = make_type("Attribute", expr_type, Attribute_fields, 3);
@@ -1586,6 +1592,27 @@ Str(string s, int lineno, int col_offset, PyArena *arena)
         return p;
 }
 
+expr_ty
+Bytes(string s, int lineno, int col_offset, PyArena *arena)
+{
+        expr_ty p;
+        if (!s) {
+                PyErr_SetString(PyExc_ValueError,
+                                "field s is required for Bytes");
+                return NULL;
+        }
+        p = (expr_ty)PyArena_Malloc(arena, sizeof(*p));
+        if (!p) {
+                PyErr_NoMemory();
+                return NULL;
+        }
+        p->kind = Bytes_kind;
+        p->v.Bytes.s = s;
+        p->lineno = lineno;
+        p->col_offset = col_offset;
+        return p;
+}
+
 expr_ty
 Ellipsis(int lineno, int col_offset, PyArena *arena)
 {
@@ -2550,6 +2577,15 @@ ast2obj_expr(void* _o)
                         goto failed;
                 Py_DECREF(value);
                 break;
+        case Bytes_kind:
+                result = PyType_GenericNew(Bytes_type, NULL, NULL);
+                if (!result) goto failed;
+                value = ast2obj_string(o->v.Bytes.s);
+                if (!value) goto failed;
+                if (PyObject_SetAttrString(result, "s", value) == -1)
+                        goto failed;
+                Py_DECREF(value);
+                break;
         case Ellipsis_kind:
                 result = PyType_GenericNew(Ellipsis_type, NULL, NULL);
                 if (!result) goto failed;
@@ -3089,7 +3125,7 @@ init_ast(void)
         if (PyDict_SetItemString(d, "AST", (PyObject*)AST_type) < 0) return;
         if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)
                 return;
-        if (PyModule_AddStringConstant(m, "__version__", "53731") < 0)
+        if (PyModule_AddStringConstant(m, "__version__", "53866") < 0)
                 return;
         if (PyDict_SetItemString(d, "mod", (PyObject*)mod_type) < 0) return;
         if (PyDict_SetItemString(d, "Module", (PyObject*)Module_type) < 0)
@@ -3155,6 +3191,7 @@ init_ast(void)
         if (PyDict_SetItemString(d, "Call", (PyObject*)Call_type) < 0) return;
         if (PyDict_SetItemString(d, "Num", (PyObject*)Num_type) < 0) return;
         if (PyDict_SetItemString(d, "Str", (PyObject*)Str_type) < 0) return;
+        if (PyDict_SetItemString(d, "Bytes", (PyObject*)Bytes_type) < 0) return;
         if (PyDict_SetItemString(d, "Ellipsis", (PyObject*)Ellipsis_type) < 0)
             return;
         if (PyDict_SetItemString(d, "Attribute", (PyObject*)Attribute_type) <
index a7d5713169d2a10daa9f61a76a559078c8c9d06e..9d5caf87e4dc10b5172e79b4785c7339df814d65 100644 (file)
@@ -33,8 +33,9 @@ static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
 
 static PyObject *parsenumber(const char *);
-static PyObject *parsestr(const char *s, const char *encoding);
-static PyObject *parsestrplus(struct compiling *, const node *n);
+static PyObject *parsestr(const node *n, const char *encoding, int *bytesmode);
+static PyObject *parsestrplus(struct compiling *, const node *n,
+                              int *bytesmode);
 
 #ifndef LINENO
 #define LINENO(n)       ((n)->n_lineno)
@@ -1383,6 +1384,7 @@ ast_for_atom(struct compiling *c, const node *n)
        | '{' [dictsetmaker] '}' | NAME | NUMBER | STRING+
     */
     node *ch = CHILD(n, 0);
+    int bytesmode = 0;
     
     switch (TYPE(ch)) {
     case NAME:
@@ -1390,12 +1392,15 @@ ast_for_atom(struct compiling *c, const node *n)
            changed. */
         return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), n->n_col_offset, c->c_arena);
     case STRING: {
-        PyObject *str = parsestrplus(c, n);
+        PyObject *str = parsestrplus(c, n, &bytesmode);
         if (!str)
             return NULL;
 
         PyArena_AddPyObject(c->c_arena, str);
-        return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
+        if (bytesmode)
+            return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena);
+        else
+            return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
     }
     case NUMBER: {
         PyObject *pynum = parsenumber(STR(ch));
@@ -3254,9 +3259,10 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
  * parsestr parses it, and returns the decoded Python string object.
  */
 static PyObject *
-parsestr(const char *s, const char *encoding)
+parsestr(const node *n, const char *encoding, int *bytesmode)
 {
         size_t len;
+        const char *s = STR(n);
         int quote = Py_CHARMASK(*s);
         int rawmode = 0;
         int need_encoding;
@@ -3267,6 +3273,10 @@ parsestr(const char *s, const char *encoding)
                         quote = *++s;
                         unicode = 1;
                 }
+                if (quote == 'b' || quote == 'B') {
+                        quote = *++s;
+                        *bytesmode = 1;
+                }             
                 if (quote == 'r' || quote == 'R') {
                         quote = *++s;
                         rawmode = 1;
@@ -3276,6 +3286,10 @@ parsestr(const char *s, const char *encoding)
                 PyErr_BadInternalCall();
                 return NULL;
         }
+        if (unicode && *bytesmode) {
+                ast_error(n, "string cannot be both bytes and unicode");
+                return NULL;
+        }
         s++;
         len = strlen(s);
         if (len > INT_MAX) {
@@ -3300,7 +3314,18 @@ parsestr(const char *s, const char *encoding)
                 return decode_unicode(s, len, rawmode, encoding);
         }
 #endif
-        need_encoding = (encoding != NULL &&
+        if (*bytesmode) {
+                /* Disallow non-ascii characters (but not escapes) */
+                const char *c;
+                for (c = s; *c; c++) {
+                        if (Py_CHARMASK(*c) >= 0x80) {
+                                ast_error(n, "bytes can only contain ASCII "
+                                          "literal characters.");
+                                return NULL;
+                        }
+                }
+        }
+        need_encoding = (!*bytesmode && encoding != NULL &&
                          strcmp(encoding, "utf-8") != 0 &&
                          strcmp(encoding, "iso-8859-1") != 0);
         if (rawmode || strchr(s, '\\') == NULL) {
@@ -3332,18 +3357,25 @@ parsestr(const char *s, const char *encoding)
  * pasting the intermediate results together.
  */
 static PyObject *
-parsestrplus(struct compiling *c, const node *n)
+parsestrplus(struct compiling *c, const node *n, int *bytesmode)
 {
         PyObject *v;
         int i;
         REQ(CHILD(n, 0), STRING);
-        if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
+        v = parsestr(CHILD(n, 0), c->c_encoding, bytesmode);
+        if (v != NULL) {
                 /* String literal concatenation */
                 for (i = 1; i < NCH(n); i++) {
                         PyObject *s;
-                        s = parsestr(STR(CHILD(n, i)), c->c_encoding);
+                        int subbm = 0;
+                        s = parsestr(CHILD(n, i), c->c_encoding, &subbm);
                         if (s == NULL)
                                 goto onError;
+                        if (*bytesmode != subbm) {
+                                ast_error(n, "cannot mix bytes and nonbytes"
+                                          "literals");
+                                goto onError;
+                        }
                         if (PyString_Check(v) && PyString_Check(s)) {
                                 PyString_ConcatAndDel(&v, s);
                                 if (v == NULL)
index 0194687e22e4f94a4337d45348c60df22830af4e..5ceb743a80d1c617d220923358e5de1f4b1ec0a8 100644 (file)
@@ -1885,6 +1885,19 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
                        PUSH(x);
                        if (x != NULL) continue;
                        break;
+               
+               case MAKE_BYTES:
+                       w = POP();
+                       if (PyString_Check(w))
+                               x = PyBytes_FromStringAndSize(
+                                       PyString_AS_STRING(w),
+                                       PyString_GET_SIZE(w));
+                       else
+                               x = NULL;
+                       Py_DECREF(w);
+                       PUSH(x);
+                       if (x != NULL) continue;
+                       break;
 
                case LOAD_ATTR:
                        w = GETITEM(names, oparg);
index 927569a9287be098a5f87de751b7e1dc2162e96a..9655765f4c6f8362458f9ef60d2ba9266d0bf2cb 100644 (file)
@@ -789,6 +789,8 @@ opcode_stack_effect(int opcode, int oparg)
                        return 1-oparg;
                case BUILD_MAP:
                        return 1;
+               case MAKE_BYTES:
+                       return 0;
                case LOAD_ATTR:
                        return 0;
                case COMPARE_OP:
@@ -3077,6 +3079,10 @@ compiler_visit_expr(struct compiler *c, expr_ty e)
        case Str_kind:
                ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts);
                break;
+       case Bytes_kind:
+               ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts);
+               ADDOP(c, MAKE_BYTES);
+               break;
        case Ellipsis_kind:
                ADDOP_O(c, LOAD_CONST, Py_Ellipsis, consts);
                break;
@@ -3426,7 +3432,6 @@ compiler_visit_slice(struct compiler *c, slice_ty s, expr_context_ty ctx)
        return compiler_handle_subscr(c, kindname, ctx);
 }
 
-
 /* End of the compiler section, beginning of the assembler section */
 
 /* do depth-first search of basic block graph, starting with block.