From: Raymond Hettinger Date: Tue, 22 Apr 2003 06:49:11 +0000 (+0000) Subject: Improved the bytecode optimizer. X-Git-Tag: v2.3c1~1090 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=060641d51160f6bf49a049bb677f8412b5a19de3;p=python Improved the bytecode optimizer. * Can now test for basic blocks. * Optimize inverted comparisions. * Optimize unary_not followed by a conditional jump. * Added a new opcode, NOP, to keep code size constant. * Applied NOP to previous transformations where appropriate. Note, the NOP would not be necessary if other functions were added to re-target jump addresses and update the co_lnotab mapping. That would yield slightly faster and cleaner bytecode at the expense of optimizer simplicity and of keeping it decoupled from the line-numbering structure. --- diff --git a/Include/opcode.h b/Include/opcode.h index 2f3dd04ba4..9f7d263e66 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -14,6 +14,8 @@ extern "C" { #define DUP_TOP 4 #define ROT_FOUR 5 +#define NOP 9 + #define UNARY_POSITIVE 10 #define UNARY_NEGATIVE 11 #define UNARY_NOT 12 diff --git a/Lib/opcode.py b/Lib/opcode.py index cfde5f803b..15f92a740e 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -49,6 +49,8 @@ def_op('ROT_THREE', 3) def_op('DUP_TOP', 4) def_op('ROT_FOUR', 5) +def_op('NOP', 9) + def_op('UNARY_POSITIVE', 10) def_op('UNARY_NEGATIVE', 11) def_op('UNARY_NOT', 12) diff --git a/Misc/NEWS b/Misc/NEWS index 05f9ac6a47..2897793eec 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -294,6 +294,13 @@ Core and builtins value, but according to PEP 237 it really needs to be 1 now. This will be backported to Python 2.2.3 a well. (SF #660455) +- Added several bytecode optimizations. Provides speed-ups to + inverted in/is tests, inverted jumps, while 1 loops, and jumps to + unconditional jumps. + +- Added a new opcode, NOP, which is used in some of the bytecode + transformations. + - int(s, base) sometimes sign-folds hex and oct constants; it only does this when base is 0 and s.strip() starts with a '0'. When the sign is actually folded, as in int("0xffffffff", 0) on a 32-bit diff --git a/Python/ceval.c b/Python/ceval.c index 3ea1bdc966..7f8f65493b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -873,6 +873,9 @@ eval_frame(PyFrameObject *f) /* case STOP_CODE: this is an error! */ + case NOP: + goto fast_next_opcode; + case LOAD_FAST: x = GETLOCAL(oparg); if (x != NULL) { diff --git a/Python/compile.c b/Python/compile.c index 57f0edb8f9..4afd0eb725 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -328,6 +328,43 @@ intern_strings(PyObject *tuple) #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 +#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) +#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1]) + +static unsigned int * +markblocks(unsigned char *code, int len) +{ + unsigned int *blocks = PyMem_Malloc(len*sizeof(int)); + int i,j, opcode, oldblock, newblock, blockcnt = 0; + + if (blocks == NULL) + return NULL; + memset(blocks, 0, len*sizeof(int)); + for (i=0 ; i a is not b + not a in b --> a not in b + not a is not b --> a is b + not a not in b --> a in b */ + case COMPARE_OP: + j = GETARG(codestr, i); + if (codestr[i+3] != UNARY_NOT || j < 6 || \ + j > 9 || !ISBASICBLOCK(blocks,i,4)) + continue; + SETARG(codestr, i, (j^1)); + codestr[i+3] = NOP; + break; + /* Replace jumps to unconditional jumps */ case FOR_ITER: case JUMP_FORWARD: @@ -402,7 +475,7 @@ optimize_code(PyObject *code, PyObject* consts) case SETUP_EXCEPT: case SETUP_FINALLY: tgt = GETJUMPTGT(codestr, i); - if (!UNCONDITIONAL_JUMP(codestr[tgt])) + if (!UNCONDITIONAL_JUMP(codestr[tgt])) continue; tgttgt = GETJUMPTGT(codestr, tgt); if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */ @@ -422,6 +495,7 @@ optimize_code(PyObject *code, PyObject* consts) } code = PyString_FromStringAndSize(codestr, codelen); PyMem_Free(codestr); + PyMem_Free(blocks); return code; exitUnchanged: