nasm requires the instruction length (movd/movq) to match to its
parameters. I find it more clear to really use 64bit instructions when
we use 64bit registers in the assembly.
Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.
Change-Id: Id9b1a5cdfb1bc05697e523c317a296df43d42a91
movq mm2, [rsi + 16] ;ip[8]
movq mm3, [rsi + 24] ;ip[12]
- movd mm7, rax
+ movq mm7, rax
movq mm4, mm0
punpcklwd mm7, mm7 ;0003000300030003h
psrlq mm0, 16
movzx rcx, word ptr arg(6) ;Dc
psllq mm0, 16
- movd mm7, rcx
+ movq mm7, rcx
por mm0, mm7
movsxd rax, dword ptr arg(4) ;pitch
psrldq xmm0, 8
paddd xmm0, xmm3
- movd rax, xmm0
+ movq rax, xmm0
pop rdi
pop rsi
psrlq mm1, 32
paddd mm0, mm1
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
psrlq mm2, 32
paddd mm0, mm2
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
psrldq xmm0, 8
paddd xmm0, xmm1
- movd rax, xmm0
+ movq rax, xmm0
pop rdi
pop rsi
psrlq mm7, 32
paddd mm0, mm7
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
psrldq xmm1, 8
paddd xmm1, xmm2
- movd rax, xmm1
+ movq rax, xmm1
pop rdi
pop rsi
paddd mm0, mm5
; eob adjustment begins here
- movd rcx, mm0
+ movq rcx, mm0
and rcx, 0xffff
xor rdx, rdx
and rax, rdx
; Substitute the sse assembly for the old mmx mixed assembly/C. The
; following is kept as reference
- ; movd rcx, mm0
+ ; movq rcx, mm0
; bsr rax, rcx
;
; mov eob, rax
psrldq xmm0, 4
paddd xmm1, xmm0
- movd rcx, xmm1
+ movq rcx, xmm1
and rcx, 0xffff
xor rdx, rdx
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
psrlq mm0, 32
paddw mm0, mm1
- movd rax, mm0
+ movq rax, mm0
pop rdi
pop rsi
psrlq mm0, 32
paddw mm7, mm0
- movd rax, mm7
+ movq rax, mm7
pop rdi
pop rsi
psrldq xmm7, 8
paddw xmm0, xmm7
- movd rax, xmm0
+ movq rax, xmm0
; begin epilog
pop rdi
x8x16sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x8x16sad_wmt_early_exit
cmp rsi, rcx
jne x8x16sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x8x16sad_wmt_early_exit:
x8x8sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x8x8sad_wmt_early_exit
cmp rsi, rcx
jne x8x8sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x8x8sad_wmt_early_exit:
; begin epilog
psadbw mm4, mm5
paddw mm0, mm4
- movd rax, mm0
+ movq rax, mm0
; begin epilog
pop rdi
x16x8sad_wmt_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg x16x8sad_wmt_early_exit
cmp rsi, rcx
jne x16x8sad_wmt_loop
- movd rax, mm7
+ movq rax, mm7
x16x8sad_wmt_early_exit:
vp8_sad16x16_sse3_loop:
- movd rax, mm7
+ movq rax, mm7
cmp rax, arg(4)
jg vp8_sad16x16_early_exit
cmp rsi, rcx
jne vp8_sad16x16_sse3_loop
- movd rax, mm7
+ movq rax, mm7
vp8_sad16x16_early_exit:
psrlq mm7, 32
paddd mm0, mm7
- movd rax, mm0
+ movq rax, mm0
; begin epilog
movdqa xmm3,xmm4
psrldq xmm4,4
paddd xmm4,xmm3
- movd rax,xmm4
+ movq rax,xmm4
; begin epilog
mov rax, arg(5) ;[Sum]
mov rdi, arg(4) ;[SSE]
- movd rdx, xmm7
+ movq rdx, xmm7
movsx rcx, dx
mov dword ptr [rax], ecx
%define rsp esp
%define rbp ebp
%define movsxd mov
+%macro movq 2
+ %ifidn %1,eax
+ movd %1,%2
+ %elifidn %2,eax
+ movd %1,%2
+ %elifidn %1,ebx
+ movd %1,%2
+ %elifidn %2,ebx
+ movd %1,%2
+ %elifidn %1,ecx
+ movd %1,%2
+ %elifidn %2,ecx
+ movd %1,%2
+ %elifidn %1,edx
+ movd %1,%2
+ %elifidn %2,edx
+ movd %1,%2
+ %elifidn %1,esi
+ movd %1,%2
+ %elifidn %2,esi
+ movd %1,%2
+ %elifidn %1,edi
+ movd %1,%2
+ %elifidn %2,edi
+ movd %1,%2
+ %elifidn %1,esp
+ movd %1,%2
+ %elifidn %2,esp
+ movd %1,%2
+ %elifidn %1,ebp
+ movd %1,%2
+ %elifidn %2,ebp
+ movd %1,%2
+ %else
+ movq %1,%2
+ %endif
+%endmacro
%endif