From: Jan Kratochvil Date: Mon, 4 Oct 2010 21:19:33 +0000 (+0200) Subject: nasm: match instruction length (movd/movq) to parameters X-Git-Tag: v0.9.5~64 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e114f699f654235e05fb1bbd605c35708890d8b8;p=libvpx nasm: match instruction length (movd/movq) to parameters nasm requires the instruction length (movd/movq) to match to its parameters. I find it more clear to really use 64bit instructions when we use 64bit registers in the assembly. Provide nasm compatibility. No binary change by this patch with yasm on {x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on {x86_64,i686}-fedora13-linux-gnu have been checked as safe. Change-Id: Id9b1a5cdfb1bc05697e523c317a296df43d42a91 --- diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm index 3f0671c58..10b5274dc 100644 --- a/vp8/common/x86/iwalsh_mmx.asm +++ b/vp8/common/x86/iwalsh_mmx.asm @@ -69,7 +69,7 @@ sym(vp8_short_inv_walsh4x4_mmx): movq mm2, [rsi + 16] ;ip[8] movq mm3, [rsi + 24] ;ip[12] - movd mm7, rax + movq mm7, rax movq mm4, mm0 punpcklwd mm7, mm7 ;0003000300030003h diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm index 150d090b6..eb9d1f8aa 100644 --- a/vp8/decoder/x86/dequantize_mmx.asm +++ b/vp8/decoder/x86/dequantize_mmx.asm @@ -288,7 +288,7 @@ sym(vp8_dequant_dc_idct_add_mmx): psrlq mm0, 16 movzx rcx, word ptr arg(6) ;Dc psllq mm0, 16 - movd mm7, rcx + movq mm7, rcx por mm0, mm7 movsxd rax, dword ptr arg(4) ;pitch diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm index 413d74d61..c0f06bbbb 100644 --- a/vp8/encoder/x86/encodeopt.asm +++ b/vp8/encoder/x86/encodeopt.asm @@ -50,7 +50,7 @@ sym(vp8_block_error_xmm): psrldq xmm0, 8 paddd xmm0, xmm3 - movd rax, xmm0 + movq rax, xmm0 pop rdi pop rsi @@ -115,7 +115,7 @@ sym(vp8_block_error_mmx): psrlq mm1, 32 paddd mm0, mm1 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -192,7 +192,7 @@ mberror_loop_mmx: psrlq mm2, 32 paddd mm0, mm2 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -260,7 +260,7 @@ mberror_loop: psrldq xmm0, 8 paddd xmm0, xmm1 - movd rax, xmm0 + movq rax, xmm0 pop rdi pop rsi @@ -317,7 +317,7 @@ mbuverror_loop_mmx: psrlq mm7, 32 paddd mm0, mm7 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -374,7 +374,7 @@ mbuverror_loop: psrldq xmm1, 8 paddd xmm1, xmm2 - movd rax, xmm1 + movq rax, xmm1 pop rdi pop rsi diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm index a867409b5..51cd94078 100644 --- a/vp8/encoder/x86/quantize_mmx.asm +++ b/vp8/encoder/x86/quantize_mmx.asm @@ -249,7 +249,7 @@ sym(vp8_fast_quantize_b_impl_mmx): paddd mm0, mm5 ; eob adjustment begins here - movd rcx, mm0 + movq rcx, mm0 and rcx, 0xffff xor rdx, rdx @@ -262,7 +262,7 @@ sym(vp8_fast_quantize_b_impl_mmx): and rax, rdx ; Substitute the sse assembly for the old mmx mixed assembly/C. The ; following is kept as reference - ; movd rcx, mm0 + ; movq rcx, mm0 ; bsr rax, rcx ; ; mov eob, rax @@ -418,7 +418,7 @@ sym(vp8_fast_quantize_b_impl_sse): psrldq xmm0, 4 paddd xmm1, xmm0 - movd rcx, xmm1 + movq rcx, xmm1 and rcx, 0xffff xor rdx, rdx diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm index ad9658bf6..19041d49f 100644 --- a/vp8/encoder/x86/sad_mmx.asm +++ b/vp8/encoder/x86/sad_mmx.asm @@ -100,7 +100,7 @@ x16x16sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi @@ -172,7 +172,7 @@ x8x16sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi @@ -242,7 +242,7 @@ x8x8sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi @@ -331,7 +331,7 @@ sym(vp8_sad4x4_mmx): psrlq mm0, 32 paddw mm0, mm1 - movd rax, mm0 + movq rax, mm0 pop rdi pop rsi @@ -418,7 +418,7 @@ x16x8sad_mmx_loop: psrlq mm0, 32 paddw mm7, mm0 - movd rax, mm7 + movq rax, mm7 pop rdi pop rsi diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm index 9f34a7ac4..0f6c5d9c4 100644 --- a/vp8/encoder/x86/sad_sse2.asm +++ b/vp8/encoder/x86/sad_sse2.asm @@ -75,7 +75,7 @@ x16x16sad_wmt_loop: psrldq xmm7, 8 paddw xmm0, xmm7 - movd rax, xmm0 + movq rax, xmm0 ; begin epilog pop rdi @@ -113,7 +113,7 @@ sym(vp8_sad8x16_wmt): x8x16sad_wmt_loop: - movd rax, mm7 + movq rax, mm7 cmp rax, arg(4) jg x8x16sad_wmt_early_exit @@ -135,7 +135,7 @@ x8x16sad_wmt_loop: cmp rsi, rcx jne x8x16sad_wmt_loop - movd rax, mm7 + movq rax, mm7 x8x16sad_wmt_early_exit: @@ -174,7 +174,7 @@ sym(vp8_sad8x8_wmt): x8x8sad_wmt_loop: - movd rax, mm7 + movq rax, mm7 cmp rax, arg(4) jg x8x8sad_wmt_early_exit @@ -190,7 +190,7 @@ x8x8sad_wmt_loop: cmp rsi, rcx jne x8x8sad_wmt_loop - movd rax, mm7 + movq rax, mm7 x8x8sad_wmt_early_exit: ; begin epilog @@ -246,7 +246,7 @@ sym(vp8_sad4x4_wmt): psadbw mm4, mm5 paddw mm0, mm4 - movd rax, mm0 + movq rax, mm0 ; begin epilog pop rdi @@ -283,7 +283,7 @@ sym(vp8_sad16x8_wmt): x16x8sad_wmt_loop: - movd rax, mm7 + movq rax, mm7 cmp rax, arg(4) jg x16x8sad_wmt_early_exit @@ -317,7 +317,7 @@ x16x8sad_wmt_loop: cmp rsi, rcx jne x16x8sad_wmt_loop - movd rax, mm7 + movq rax, mm7 x16x8sad_wmt_early_exit: diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm index c2a1ae70a..b12c81562 100644 --- a/vp8/encoder/x86/sad_sse3.asm +++ b/vp8/encoder/x86/sad_sse3.asm @@ -530,7 +530,7 @@ sym(vp8_sad16x16_sse3): vp8_sad16x16_sse3_loop: - movd rax, mm7 + movq rax, mm7 cmp rax, arg(4) jg vp8_sad16x16_early_exit @@ -564,7 +564,7 @@ vp8_sad16x16_sse3_loop: cmp rsi, rcx jne vp8_sad16x16_sse3_loop - movd rax, mm7 + movq rax, mm7 vp8_sad16x16_early_exit: diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm index 173238e24..d4ec63bd6 100644 --- a/vp8/encoder/x86/variance_impl_mmx.asm +++ b/vp8/encoder/x86/variance_impl_mmx.asm @@ -498,7 +498,7 @@ sym(vp8_get4x4sse_cs_mmx): psrlq mm7, 32 paddd mm0, mm7 - movd rax, mm0 + movq rax, mm0 ; begin epilog diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm index f47d9ccdd..38b3f33ee 100644 --- a/vp8/encoder/x86/variance_impl_sse2.asm +++ b/vp8/encoder/x86/variance_impl_sse2.asm @@ -58,7 +58,7 @@ NEXTROW: movdqa xmm3,xmm4 psrldq xmm4,4 paddd xmm4,xmm3 - movd rax,xmm4 + movq rax,xmm4 ; begin epilog @@ -471,7 +471,7 @@ sym(vp8_get8x8var_sse2): mov rax, arg(5) ;[Sum] mov rdi, arg(4) ;[SSE] - movd rdx, xmm7 + movq rdx, xmm7 movsx rcx, dx mov dword ptr [rax], ecx diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm index dc9e2d92c..470c58a6d 100644 --- a/vpx_ports/x86_abi_support.asm +++ b/vpx_ports/x86_abi_support.asm @@ -36,6 +36,43 @@ %define rsp esp %define rbp ebp %define movsxd mov +%macro movq 2 + %ifidn %1,eax + movd %1,%2 + %elifidn %2,eax + movd %1,%2 + %elifidn %1,ebx + movd %1,%2 + %elifidn %2,ebx + movd %1,%2 + %elifidn %1,ecx + movd %1,%2 + %elifidn %2,ecx + movd %1,%2 + %elifidn %1,edx + movd %1,%2 + %elifidn %2,edx + movd %1,%2 + %elifidn %1,esi + movd %1,%2 + %elifidn %2,esi + movd %1,%2 + %elifidn %1,edi + movd %1,%2 + %elifidn %2,edi + movd %1,%2 + %elifidn %1,esp + movd %1,%2 + %elifidn %2,esp + movd %1,%2 + %elifidn %1,ebp + movd %1,%2 + %elifidn %2,ebp + movd %1,%2 + %else + movq %1,%2 + %endif +%endmacro %endif