x264_pixel_avg_w4_mmxext:
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
- movsxd rax, parm7d ; i_height
+ mov eax, parm7d ; i_height
ALIGN 4
.height_loop
movd mm0, [parm3q]
- pavgb mm0, [r10]
movd mm1, [parm3q+parm4q]
+ pavgb mm0, [r10]
pavgb mm1, [r10+r11]
movd [parm1q], mm0
movd [parm1q+parm2q], mm1
- dec rax
- dec rax
+ sub eax, 2
lea parm3q, [parm3q+parm4q*2]
lea r10, [r10+r11*2]
lea parm1q, [parm1q+parm2q*2]
jne .height_loop
-
- ret
+ rep ret
; int i_height );
;-----------------------------------------------------------------------------
x264_pixel_avg_w8_mmxext:
-
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
- movsxd rax, parm7d ; i_height
+ mov eax, parm7d ; i_height
ALIGN 4
.height_loop
movq mm0, [parm3q]
+ movq mm1, [parm3q+parm4q]
pavgb mm0, [r10]
+ pavgb mm1, [r10+r11]
movq [parm1q], mm0
- dec rax
- lea parm3q, [parm3q+parm4q]
- lea r10, [r10+r11]
- lea parm1q, [parm1q+parm2q]
+ movq [parm1q+parm2q], mm1
+ sub eax, 2
+ lea parm3q, [parm3q+parm4q*2]
+ lea r10, [r10+r11*2]
+ lea parm1q, [parm1q+parm2q*2]
jne .height_loop
-
- ret
+ rep ret
ALIGN 16
;-----------------------------------------------------------------------------
x264_pixel_avg_w16_mmxext:
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
- movsxd rax, parm7d ; i_height
+ mov eax, parm7d ; i_height
ALIGN 4
.height_loop
pavgb mm1, [r10+8]
movq [parm1q ], mm0
movq [parm1q+8], mm1
- dec rax
+ dec eax
lea parm3q, [parm3q+parm4q]
lea r10, [r10+r11]
lea parm1q, [parm1q+parm2q]
jne .height_loop
-
- ret
+ rep ret
ALIGN 16
;-----------------------------------------------------------------------------
x264_pixel_avg_w16_sse2:
mov r10, parm5q ; src2
movsxd r11, parm6d ; i_src2_stride
- movsxd rax, parm7d ; i_height
+ mov eax, parm7d ; i_height
ALIGN 4
.height_loop
movdqu xmm0, [parm3q]
pavgb xmm0, [r10]
movdqu [parm1q], xmm0
-
dec rax
lea parm3q, [parm3q+parm4q]
lea r10, [r10+r11]
lea parm1q, [parm1q+parm2q]
jne .height_loop
-
- ret
+ rep ret
add parm3q, parm4q
dec r11d
jnz .height_loop
- ret
+ rep ret
ALIGN 16
;-----------------------------------------------------------------------------
x264_pixel_avg_weight_w8_mmxext:
BIWEIGHT_START_MMX
- BIWEIGHT_4P_MMX [parm1q ], [parm3q ]
- BIWEIGHT_4P_MMX [parm1q+4 ], [parm3q+4 ]
- BIWEIGHT_4P_MMX [parm1q+parm2q ], [parm3q+parm4q ]
- BIWEIGHT_4P_MMX [parm1q+parm2q+4], [parm3q+parm4q+4]
+ BIWEIGHT_4P_MMX [parm1q ], [parm3q ]
+ BIWEIGHT_4P_MMX [parm1q+4], [parm3q+4]
- lea parm1q, [parm1q+parm2q*2]
- lea parm3q, [parm3q+parm4q*2]
- sub r11d, byte 2
+ add parm1q, parm2q
+ add parm3q, parm4q
+ dec r11d
jnz .height_loop
- ret
+ rep ret
ALIGN 16
;-----------------------------------------------------------------------------
dec eax
dec eax
jne .height_loop
-
- ret
+ rep ret
ALIGN 16
;-----------------------------------------------------------------------------
sub eax, byte 4
jnz .height_loop
-
- ret
+ rep ret
ALIGN 16
;-----------------------------------------------------------------------------
lea parm1q, [parm1q+parm2q*4]
sub eax, byte 4
jnz .height_loop
-
- ret
+ rep ret
ALIGN 16
movdqu xmm1, [parm3q+parm4q]
movdqu [parm1q], xmm0
movdqu [parm1q+parm2q], xmm1
- dec eax
- dec eax
+ sub eax, byte 2
lea parm3q, [parm3q+parm4q*2]
lea parm1q, [parm1q+parm2q*2]
jnz .height_loop
-
- ret
+ rep ret