]> granicus.if.org Git - python/commitdiff
Improved the bytecode optimizer.
authorRaymond Hettinger <python@rcn.com>
Tue, 22 Apr 2003 06:49:11 +0000 (06:49 +0000)
committerRaymond Hettinger <python@rcn.com>
Tue, 22 Apr 2003 06:49:11 +0000 (06:49 +0000)
* Can now test for basic blocks.
* Optimize inverted comparisions.
* Optimize unary_not followed by a conditional jump.
* Added a new opcode, NOP, to keep code size constant.
* Applied NOP to previous transformations where appropriate.

Note, the NOP would not be necessary if other functions were
added to re-target jump addresses and update the co_lnotab mapping.
That would yield slightly faster and cleaner bytecode at the
expense of optimizer simplicity and of keeping it decoupled
from the line-numbering structure.

Include/opcode.h
Lib/opcode.py
Misc/NEWS
Python/ceval.c
Python/compile.c

index 2f3dd04ba479182be3fc71445f6c422c3b2d1458..9f7d263e6692d05184feb78abb2a42d6d65e12fd 100644 (file)
@@ -14,6 +14,8 @@ extern "C" {
 #define DUP_TOP                4
 #define ROT_FOUR       5
 
+#define NOP            9
+
 #define UNARY_POSITIVE 10
 #define UNARY_NEGATIVE 11
 #define UNARY_NOT      12
index cfde5f803bf7c14472e6f15cf391a8c8ec2ac7b7..15f92a740e21e2cb04ce8e73bd9e63572433f65c 100644 (file)
@@ -49,6 +49,8 @@ def_op('ROT_THREE', 3)
 def_op('DUP_TOP', 4)
 def_op('ROT_FOUR', 5)
 
+def_op('NOP', 9)
+
 def_op('UNARY_POSITIVE', 10)
 def_op('UNARY_NEGATIVE', 11)
 def_op('UNARY_NOT', 12)
index 05f9ac6a4763c31558ab441f441cd62e96ba0d77..2897793eec2a8f76f689065e1f181cc6be294b28 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -294,6 +294,13 @@ Core and builtins
   value, but according to PEP 237 it really needs to be 1 now.  This
   will be backported to Python 2.2.3 a well.  (SF #660455)
 
+- Added several bytecode optimizations.  Provides speed-ups to
+  inverted in/is tests, inverted jumps, while 1 loops, and jumps to
+  unconditional jumps.
+
+- Added a new opcode, NOP, which is used in some of the bytecode
+  transformations.
+
 - int(s, base) sometimes sign-folds hex and oct constants; it only
   does this when base is 0 and s.strip() starts with a '0'.  When the
   sign is actually folded, as in int("0xffffffff", 0) on a 32-bit
index 3ea1bdc966cbf2dd7b01b67fd6c7015065f07451..7f8f65493bf87f23abdf6a37154ce2bb2916d0dd 100644 (file)
@@ -873,6 +873,9 @@ eval_frame(PyFrameObject *f)
 
                /* case STOP_CODE: this is an error! */
 
+               case NOP:
+                       goto fast_next_opcode;
+
                case LOAD_FAST:
                        x = GETLOCAL(oparg);
                        if (x != NULL) {
index 57f0edb8f9c0edeabcc2a3119647668e4054c8ca..4afd0eb7256b94a344af58f8070bd1f4da74c614 100644 (file)
@@ -328,6 +328,43 @@ intern_strings(PyObject *tuple)
 #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
 #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
 #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
+#define CODESIZE(op)  (HAS_ARG(op) ? 3 : 1)
+#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1])
+
+static unsigned int *
+markblocks(unsigned char *code, int len)
+{
+       unsigned int *blocks = PyMem_Malloc(len*sizeof(int));
+       int i,j, opcode, oldblock, newblock, blockcnt = 0;
+
+       if (blocks == NULL)
+               return NULL;
+       memset(blocks, 0, len*sizeof(int));
+       for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
+               opcode = code[i];
+               switch (opcode) {
+                       case FOR_ITER:
+                       case JUMP_FORWARD:
+                       case JUMP_IF_FALSE:
+                       case JUMP_IF_TRUE:
+                       case JUMP_ABSOLUTE:
+                       case CONTINUE_LOOP:
+                       case SETUP_LOOP:
+                       case SETUP_EXCEPT:
+                       case SETUP_FINALLY:
+                               j = GETJUMPTGT(code, i);
+                               oldblock = blocks[j];
+                               newblock = ++blockcnt;
+                               for (; j<len ; j++) {
+                                       if (blocks[j] != (unsigned)oldblock)
+                                               break;
+                                       blocks[j] = newblock;
+                               }
+                       break;
+               }
+       }
+       return blocks;
+}
 
 static PyObject *
 optimize_code(PyObject *code, PyObject* consts)
@@ -335,18 +372,24 @@ optimize_code(PyObject *code, PyObject* consts)
        int i, j, codelen;
        int tgt, tgttgt, opcode;
        unsigned char *codestr;
+       unsigned int *blocks;
 
        /* Make a modifiable copy of the code string */
        if (!PyString_Check(code))
                goto exitUnchanged;
        codelen = PyString_Size(code);
        codestr = PyMem_Malloc(codelen);
-       if (codestr == NULL) 
+       if (codestr == NULL)
                goto exitUnchanged;
        codestr = memcpy(codestr, PyString_AS_STRING(code), codelen);
+       blocks = markblocks(codestr, codelen);
+       if (blocks == NULL) {
+               PyMem_Free(codestr);
+               goto exitUnchanged;
+       }
        assert(PyTuple_Check(consts));
 
-       for (i=0 ; i<codelen-7 ; i += HAS_ARG(codestr[i]) ? 3 : 1) {
+       for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
                opcode = codestr[i];
                switch (opcode) {
 
@@ -363,8 +406,8 @@ optimize_code(PyObject *code, PyObject* consts)
                        SETARG(codestr, i, 4);
                        break;
 
-               /* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2.
-                  Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1.
+               /* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2 NOP NOP.
+                  Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1 NOP.
                   Note, these opcodes occur together only in assignment
                   statements.  Accordingly, the unpack opcode is never
                   a jump target.  */
@@ -377,8 +420,8 @@ optimize_code(PyObject *code, PyObject* consts)
                                codestr[i] = ROT_TWO;
                                codestr[i+1] = JUMP_FORWARD;
                                SETARG(codestr, i+1, 2);
-                               codestr[i+4] = DUP_TOP;  /* Filler codes used as NOPs */
-                               codestr[i+5] = POP_TOP;
+                               codestr[i+4] = NOP;
+                               codestr[i+5] = NOP;
                                continue;
                        } 
                        if (GETARG(codestr, i) == 3 && \
@@ -386,11 +429,41 @@ optimize_code(PyObject *code, PyObject* consts)
                                codestr[i] = ROT_THREE;
                                codestr[i+1] = ROT_TWO;
                                codestr[i+2] = JUMP_FORWARD;
-                               SETARG(codestr, i+2, 1);        
-                               codestr[i+5] = DUP_TOP;
+                               SETARG(codestr, i+2, 1);
+                               codestr[i+5] = NOP;
                        }
                        break;
 
+               /* Simplify inverted tests.
+                  Must verify that sequence is a basic block because the jump
+                  can itself be a jump target.  Also, must verify that *both*
+                  jump alternatives go to a POP_TOP.  Otherwise, the code will
+                  expect the stack value to have been inverted.  */
+               case UNARY_NOT:
+                       if (codestr[i+1] != JUMP_IF_FALSE || \
+                           codestr[i+4] != POP_TOP || \
+                           !ISBASICBLOCK(blocks,i,5))
+                               continue;
+                       tgt = GETJUMPTGT(codestr, (i+1));
+                       if (codestr[tgt] != POP_TOP)
+                               continue;
+                       codestr[i] = NOP;
+                       codestr[i+1] = JUMP_IF_TRUE;
+                       break;
+
+               /* not a is b -->  a is not b
+                  not a in b -->  a not in b
+                  not a is not b -->  a is b
+                  not a not in b -->  a in b */
+               case COMPARE_OP:
+                       j = GETARG(codestr, i);
+                       if (codestr[i+3] != UNARY_NOT || j < 6 || \
+                           j > 9 || !ISBASICBLOCK(blocks,i,4))
+                               continue;
+                       SETARG(codestr, i, (j^1));
+                       codestr[i+3] = NOP;
+                       break;
+
                /* Replace jumps to unconditional jumps */
                case FOR_ITER:
                case JUMP_FORWARD:
@@ -402,7 +475,7 @@ optimize_code(PyObject *code, PyObject* consts)
                case SETUP_EXCEPT:
                case SETUP_FINALLY:
                        tgt = GETJUMPTGT(codestr, i);
-                       if (!UNCONDITIONAL_JUMP(codestr[tgt])) 
+                       if (!UNCONDITIONAL_JUMP(codestr[tgt]))
                                continue;
                        tgttgt = GETJUMPTGT(codestr, tgt);
                        if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
@@ -422,6 +495,7 @@ optimize_code(PyObject *code, PyObject* consts)
        }
        code = PyString_FromStringAndSize(codestr, codelen);
        PyMem_Free(codestr);
+       PyMem_Free(blocks);
        return code;
 
 exitUnchanged: