From: Henrik Gramner Date: Sat, 31 Mar 2018 11:49:56 +0000 (+0200) Subject: x86inc: Optimize VEX instruction encoding X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8badb910847e94abb66686009e424bdce355c9f4;p=libx264 x86inc: Optimize VEX instruction encoding Most VEX-encoded instructions require an additional byte to encode when src2 is a high register (e.g. x|ymm8..15). If the instruction is commutative we can swap src1 and src2 when doing so reduces the instruction length, e.g. vpaddw xmm0, xmm0, xmm8 -> vpaddw xmm0, xmm8, xmm0 --- diff --git a/common/x86/x86inc.asm b/common/x86/x86inc.asm index 49e73d65..280a9955 100644 --- a/common/x86/x86inc.asm +++ b/common/x86/x86inc.asm @@ -1240,9 +1240,40 @@ INIT_XMM %elif %0 >= 9 __instr %6, %7, %8, %9 %elif %0 == 8 - __instr %6, %7, %8 + %if avx_enabled && %5 + %xdefine __src1 %7 + %xdefine __src2 %8 + %ifnum regnumof%7 + %ifnum regnumof%8 + %if regnumof%7 < 8 && regnumof%8 >= 8 && regnumof%8 < 16 && sizeof%8 <= 32 + ; Most VEX-encoded instructions require an additional byte to encode when + ; src2 is a high register (e.g. m8..15). If the instruction is commutative + ; we can swap src1 and src2 when doing so reduces the instruction length. + %xdefine __src1 %8 + %xdefine __src2 %7 + %endif + %endif + %endif + __instr %6, __src1, __src2 + %else + __instr %6, %7, %8 + %endif %elif %0 == 7 - __instr %6, %7 + %if avx_enabled && %5 + %xdefine __src1 %6 + %xdefine __src2 %7 + %ifnum regnumof%6 + %ifnum regnumof%7 + %if regnumof%6 < 8 && regnumof%7 >= 8 && regnumof%7 < 16 && sizeof%7 <= 32 + %xdefine __src1 %7 + %xdefine __src2 %6 + %endif + %endif + %endif + __instr %6, __src1, __src2 + %else + __instr %6, %7 + %endif %else __instr %6 %endif