From 92b0bd9665860d7b48f313d6fd72a583ecb01ddf Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Tue, 14 Feb 2012 15:07:10 -0800 Subject: [PATCH] Minor asm changes --- common/x86/deblock-a.asm | 11 +++++------ common/x86/quant-a.asm | 16 ++++++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/common/x86/deblock-a.asm b/common/x86/deblock-a.asm index e7c5dc31..b4cfa4a4 100644 --- a/common/x86/deblock-a.asm +++ b/common/x86/deblock-a.asm @@ -2218,10 +2218,9 @@ cglobal deblock_h_chroma_intra_mbaff, 4,6,8 %endmacro %macro LOAD_BYTES_XMM 1 - movu m0, [%1-4] ; FIXME could be aligned if we changed nnz's allocation + movu m2, [%1-4] ; FIXME could be aligned if we changed nnz's allocation movu m1, [%1+12] - mova m2, m0 - pslldq m0, 1 + pslldq m0, m2, 1 shufps m2, m1, q3131 ; cur nnz, all rows pslldq m1, 1 shufps m0, m1, q3131 ; left neighbors @@ -2278,7 +2277,7 @@ cglobal deblock_strength, 6,6 RET %macro DEBLOCK_STRENGTH_XMM 0 -cglobal deblock_strength, 6,6,8 +cglobal deblock_strength, 6,6,7 ; Prepare mv comparison register shl r4d, 8 add r4d, 3 - (1<<8) @@ -2308,9 +2307,9 @@ cglobal deblock_strength, 6,6,8 mova m2, [mv+4*8*2] mova m1, [mv+4*8*3] palignr m3, m2, [mv+4*8*2-16], 12 - palignr m7, m1, [mv+4*8*3-16], 12 psubw m2, m3 - psubw m1, m7 + palignr m3, m1, [mv+4*8*3-16], 12 + psubw m1, m3 packsswb m2, m1 %else movu m0, [mv-4+4*8*0] diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm index fd8d2dba..a0a04622 100644 --- a/common/x86/quant-a.asm +++ b/common/x86/quant-a.asm @@ -1311,9 +1311,9 @@ cglobal coeff_last64, 1,4 shl r0d, 16 or r1d, r2d or r3d, r0d - shl r3, 32 - or r1, r3 - not r1 + shl r3, 32 + or r1, r3 + not r1 BSR rax, r1, 0x3f RET %endif @@ -1348,14 +1348,14 @@ cglobal coeff_level_run%1,0,7 pxor m2, m2 LAST_MASK %1, t5d, t0-(%1&1)*SIZEOF_DCTCOEF, t4d %if %1==15 - shr t5d, 1 + shr t5d, 1 %elif %1==8 - and t5d, 0xff + and t5d, 0xff %elif %1==4 - and t5d, 0xf + and t5d, 0xf %endif - xor t5d, (1<<%1)-1 - mov [t1+4], t5d + xor t5d, (1<<%1)-1 + mov [t1+4], t5d shl t5d, 32-%1 mov t4d, %1-1 LZCOUNT t3d, t5d, 0x1f -- 2.40.0