%elifidn __OUTPUT_FORMAT__,elf64
global %1:function hidden
%elifidn __OUTPUT_FORMAT__,macho32
- global %1:private_extern
+ %ifdef __NASM_VER__
+ global %1
+ %else
+ global %1:private_extern
+ %endif
%elifidn __OUTPUT_FORMAT__,macho64
- global %1:private_extern
+ %ifdef __NASM_VER__
+ global %1
+ %else
+ global %1:private_extern
+ %endif
%else
global %1
%endif
mov rcx, 0x0400040
movdqa xmm4, [rdx] ;load filters
- movd xmm5, rcx
+ movq xmm5, rcx
packsswb xmm4, xmm4
pshuflw xmm0, xmm4, 0b ;k0_k1
pshuflw xmm1, xmm4, 01010101b ;k2_k3
mov rcx, 0x0400040
movdqa xmm4, [rdx] ;load filters
- movd xmm5, rcx
+ movq xmm5, rcx
packsswb xmm4, xmm4
pshuflw xmm0, xmm4, 0b ;k0_k1
pshuflw xmm1, xmm4, 01010101b ;k2_k3
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
%ifidn %1, b_32x32
- pmovmskb r6, m7
- pmovmskb r2, m12
+ pmovmskb r6d, m7
+ pmovmskb r2d, m12
or r6, r2
jz .skip_iter
%endif
%ifidn %1, fp_32x32
pcmpgtw m7, m6, m0
pcmpgtw m12, m11, m0
- pmovmskb r6, m7
- pmovmskb r2, m12
+ pmovmskb r6d, m7
+ pmovmskb r2d, m12
or r6, r2
jz .skip_iter
pshufd m4, m6, 0x1
movd [r1], m7 ; store sse
paddd m6, m4
- movd rax, m6 ; store sum as return value
+ movd raxd, m6 ; store sum as return value
%else ; mmsize == 8
pshufw m4, m6, 0xe
pshufw m3, m7, 0xe
movd [r1], m7 ; store sse
pshufw m4, m6, 0xe
paddd m6, m4
- movd rax, m6 ; store sum as return value
+ movd raxd, m6 ; store sum as return value
%endif
RET
%endmacro