;=============================================================================
;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
+; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int dst_stride,
+; uint8_t *src, int src_stride,
+; int height );
;-----------------------------------------------------------------------------
cglobal x264_pixel_avg_w4_mmxext
- mov r10, parm5q ; src2
- movsxd r11, parm6d ; i_src2_stride
- mov eax, parm7d ; i_height
-
-ALIGN 4
-.height_loop
- movd mm0, [parm3q]
- movd mm1, [parm3q+parm4q]
- pavgb mm0, [r10]
- pavgb mm1, [r10+r11]
- movd [parm1q], mm0
- movd [parm1q+parm2q], mm1
- sub eax, 2
- lea parm3q, [parm3q+parm4q*2]
- lea r10, [r10+r11*2]
- lea parm1q, [parm1q+parm2q*2]
- jg .height_loop
+.height_loop:
+ movd mm0, [parm3q]
+ movd mm1, [parm3q+parm4q]
+ pavgb mm0, [parm1q]
+ pavgb mm1, [parm1q+parm2q]
+ movd [parm1q], mm0
+ movd [parm1q+parm2q], mm1
+ sub temp1d, 2
+ lea parm3q, [parm3q+parm4q*2]
+ lea parm1q, [parm1q+parm2q*2]
+ jg .height_loop
rep ret
-
-
-;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w8_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
-;-----------------------------------------------------------------------------
cglobal x264_pixel_avg_w8_mmxext
- mov r10, parm5q ; src2
- movsxd r11, parm6d ; i_src2_stride
- mov eax, parm7d ; i_height
-
-ALIGN 4
-.height_loop
- movq mm0, [parm3q]
- movq mm1, [parm3q+parm4q]
- pavgb mm0, [r10]
- pavgb mm1, [r10+r11]
- movq [parm1q], mm0
- movq [parm1q+parm2q], mm1
- sub eax, 2
- lea parm3q, [parm3q+parm4q*2]
- lea r10, [r10+r11*2]
- lea parm1q, [parm1q+parm2q*2]
- jg .height_loop
+.height_loop:
+ movq mm0, [parm3q]
+ movq mm1, [parm3q+parm4q]
+ pavgb mm0, [parm1q]
+ pavgb mm1, [parm1q+parm2q]
+ movq [parm1q], mm0
+ movq [parm1q+parm2q], mm1
+ sub temp1d, 2
+ lea parm3q, [parm3q+parm4q*2]
+ lea parm1q, [parm1q+parm2q*2]
+ jg .height_loop
rep ret
-;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w16_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
-;-----------------------------------------------------------------------------
cglobal x264_pixel_avg_w16_mmxext
- mov r10, parm5q ; src2
- movsxd r11, parm6d ; i_src2_stride
- mov eax, parm7d ; i_height
+.height_loop:
+ movq mm0, [parm3q ]
+ movq mm1, [parm3q+8]
+ movq mm2, [parm3q+parm4q ]
+ movq mm3, [parm3q+parm4q+8]
+ pavgb mm0, [parm1q ]
+ pavgb mm1, [parm1q+8]
+ pavgb mm2, [parm1q+parm2q ]
+ pavgb mm3, [parm1q+parm2q+8]
+ movq [parm1q ], mm0
+ movq [parm1q+8], mm1
+ movq [parm1q+parm2q ], mm2
+ movq [parm1q+parm2q+8], mm3
+ sub temp1d, 2
+ lea parm3q, [parm3q+parm4q*2]
+ lea parm1q, [parm1q+parm2q*2]
+ jg .height_loop
+ rep ret
-ALIGN 4
-.height_loop
- movq mm0, [parm3q ]
- movq mm1, [parm3q+8]
- pavgb mm0, [r10 ]
- pavgb mm1, [r10+8]
- movq [parm1q ], mm0
- movq [parm1q+8], mm1
- dec eax
- lea parm3q, [parm3q+parm4q]
- lea r10, [r10+r11]
- lea parm1q, [parm1q+parm2q]
- jg .height_loop
+cglobal x264_pixel_avg_w16_sse2
+.height_loop:
+ movdqu xmm0, [parm3q]
+ movdqu xmm1, [parm3q+parm4q]
+ pavgb xmm0, [parm1q]
+ pavgb xmm1, [parm1q+parm2q]
+ movdqa [parm1q], xmm0
+ movdqa [parm1q+parm2q], xmm1
+ sub temp1d, 2
+ lea parm3q, [parm3q+parm4q*2]
+ lea parm1q, [parm1q+parm2q*2]
+ jg .height_loop
rep ret
+%macro AVGH 2
+cglobal x264_pixel_avg_%1x%2_mmxext
+ mov temp1d, %2
+ jmp x264_pixel_avg_w%1_mmxext
+%endmacro
+
+AVGH 16, 16
+AVGH 16, 8
+AVGH 8, 16
+AVGH 8, 8
+AVGH 8, 4
+AVGH 4, 8
+AVGH 4, 4
+AVGH 4, 2
+
;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w20_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
+; void x264_pixel_avg2_w4_mmxext( uint8_t *dst, int dst_stride,
+; uint8_t *src1, int src_stride,
+; uint8_t *src2, int height );
;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_w20_mmxext
- mov r10, parm5q ; src2
- movsxd r11, parm6d ; i_src2_stride
- mov eax, parm7d ; i_height
+%macro AVG2_START 0
+%ifdef WIN64
+ mov temp1d, parm6d
+ mov temp2q, parm5q
+%endif
+ sub parm5q, parm3q
+%endmacro
-ALIGN 4
-.height_loop
- movq mm0, [parm3q ]
- movq mm1, [parm3q+8 ]
- movd mm2, [parm3q+16]
- pavgb mm0, [r10 ]
- pavgb mm1, [r10+8 ]
- pavgb mm2, [r10+16]
- movq [parm1q ], mm0
- movq [parm1q+8 ], mm1
- movd [parm1q+16], mm2
- dec eax
- lea parm3q, [parm3q+parm4q]
- lea r10, [r10+r11]
- lea parm1q, [parm1q+parm2q]
- jg .height_loop
+cglobal x264_pixel_avg2_w4_mmxext
+ AVG2_START
+ lea r10, [temp2q+parm4q]
+.height_loop:
+ movd mm0, [parm3q]
+ movd mm1, [parm3q+parm4q]
+ pavgb mm0, [parm3q+temp2q]
+ pavgb mm1, [parm3q+r10]
+ movd [parm1q], mm0
+ movd [parm1q+parm2q], mm1
+ sub temp1d, 2
+ lea parm3q, [parm3q+parm4q*2]
+ lea parm1q, [parm1q+parm2q*2]
+ jg .height_loop
rep ret
-;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w16_sse2( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
-;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_w16_sse2
- mov r10, parm5q ; src2
- movsxd r11, parm6d ; i_src2_stride
- mov eax, parm7d ; i_height
+cglobal x264_pixel_avg2_w8_mmxext
+ AVG2_START
+ lea r10, [temp2q+parm4q]
+.height_loop:
+ movq mm0, [parm3q]
+ movq mm1, [parm3q+parm4q]
+ pavgb mm0, [parm3q+temp2q]
+ pavgb mm1, [parm3q+r10]
+ movq [parm1q], mm0
+ movq [parm1q+parm2q], mm1
+ sub temp1d, 2
+ lea parm3q, [parm3q+parm4q*2]
+ lea parm1q, [parm1q+parm2q*2]
+ jg .height_loop
+ rep ret
-ALIGN 4
-.height_loop
- movdqu xmm0, [parm3q]
- pavgb xmm0, [r10]
- movdqu [parm1q], xmm0
- dec rax
- lea parm3q, [parm3q+parm4q]
- lea r10, [r10+r11]
- lea parm1q, [parm1q+parm2q]
- jg .height_loop
+cglobal x264_pixel_avg2_w16_mmxext
+ AVG2_START
+.height_loop:
+ movq mm0, [parm3q]
+ movq mm1, [parm3q+8]
+ pavgb mm0, [parm3q+temp2q]
+ pavgb mm1, [parm3q+temp2q+8]
+ movq [parm1q], mm0
+ movq [parm1q+8], mm1
+ add parm3q, parm4q
+ add parm1q, parm2q
+ dec temp1d
+ jg .height_loop
+ rep ret
+
+cglobal x264_pixel_avg2_w20_mmxext
+ AVG2_START
+.height_loop:
+ movq mm0, [parm3q]
+ movq mm1, [parm3q+8]
+ movd mm2, [parm3q+16]
+ pavgb mm0, [parm3q+temp2q]
+ pavgb mm1, [parm3q+temp2q+8]
+ pavgb mm2, [parm3q+temp2q+16]
+ movq [parm1q], mm0
+ movq [parm1q+8], mm1
+ movd [parm1q+16], mm2
+ add parm3q, parm4q
+ add parm1q, parm2q
+ dec temp1d
+ jg .height_loop
rep ret
;=============================================================================
;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
+; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int dst_stride,
+; uint8_t *src, int src_stride,
+; int height );
;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_w4_mmxext
- push ebp
- push ebx
- push esi
- push edi
-
- mov edi, [esp+20] ; dst
- mov ebx, [esp+28] ; src1
- mov ecx, [esp+36] ; src2
- mov esi, [esp+24] ; i_dst_stride
- mov eax, [esp+32] ; i_src1_stride
- mov edx, [esp+40] ; i_src2_stride
- mov ebp, [esp+44] ; i_height
-ALIGN 4
-.height_loop
- movd mm0, [ebx]
- pavgb mm0, [ecx]
- movd mm1, [ebx+eax]
- pavgb mm1, [ecx+edx]
- movd [edi], mm0
- movd [edi+esi], mm1
- dec ebp
- dec ebp
- lea ebx, [ebx+eax*2]
- lea ecx, [ecx+edx*2]
- lea edi, [edi+esi*2]
- jg .height_loop
-
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-
-
-;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w8_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
-;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_w8_mmxext
- push ebp
- push ebx
- push esi
- push edi
-
- mov edi, [esp+20] ; dst
- mov ebx, [esp+28] ; src1
- mov ecx, [esp+36] ; src2
- mov esi, [esp+24] ; i_dst_stride
- mov eax, [esp+32] ; i_src1_stride
- mov edx, [esp+40] ; i_src2_stride
- mov ebp, [esp+44] ; i_height
-ALIGN 4
-.height_loop
- movq mm0, [ebx]
- pavgb mm0, [ecx]
- movq [edi], mm0
- dec ebp
- lea ebx, [ebx+eax]
- lea ecx, [ecx+edx]
- lea edi, [edi+esi]
- jg .height_loop
-
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
-
+%macro AVG_START 1
+cglobal %1
+ push ebx
+ mov eax, [esp+12] ; dst
+ mov ebx, [esp+16] ; dst_stride
+ mov ecx, [esp+20] ; src
+ mov edx, [esp+24] ; src_stride
+ ; esi = height
+.height_loop:
+%endmacro
-;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w16_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
-;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_w16_mmxext
- push ebp
- push ebx
- push esi
- push edi
-
- mov edi, [esp+20] ; dst
- mov ebx, [esp+28] ; src1
- mov ecx, [esp+36] ; src2
- mov esi, [esp+24] ; i_dst_stride
- mov eax, [esp+32] ; i_src1_stride
- mov edx, [esp+40] ; i_src2_stride
- mov ebp, [esp+44] ; i_height
-ALIGN 4
-.height_loop
- movq mm0, [ebx ]
- movq mm1, [ebx+8]
- pavgb mm0, [ecx ]
- pavgb mm1, [ecx+8]
- movq [edi ], mm0
- movq [edi+8], mm1
- dec ebp
- lea ebx, [ebx+eax]
- lea ecx, [ecx+edx]
- lea edi, [edi+esi]
- jg .height_loop
-
- pop edi
- pop esi
- pop ebx
- pop ebp
+%macro AVG_END 0
+ sub esi, 2
+ lea eax, [eax+ebx*2]
+ lea ecx, [ecx+edx*2]
+ jg .height_loop
+ pop ebx
+ pop esi
ret
+%endmacro
+AVG_START x264_pixel_avg_w4_mmxext
+ movd mm0, [ecx]
+ movd mm1, [ecx+edx]
+ pavgb mm0, [eax]
+ pavgb mm1, [eax+ebx]
+ movd [eax], mm0
+ movd [eax+ebx], mm1
+AVG_END
+
+AVG_START x264_pixel_avg_w8_mmxext
+ movq mm0, [ecx]
+ movq mm1, [ecx+edx]
+ pavgb mm0, [eax]
+ pavgb mm1, [eax+ebx]
+ movq [eax], mm0
+ movq [eax+ebx], mm1
+AVG_END
+
+AVG_START x264_pixel_avg_w16_mmxext
+ movq mm0, [ecx]
+ movq mm1, [ecx+8]
+ movq mm2, [ecx+edx]
+ movq mm3, [ecx+edx+8]
+ pavgb mm0, [eax]
+ pavgb mm1, [eax+8]
+ pavgb mm2, [eax+ebx]
+ pavgb mm3, [eax+ebx+8]
+ movq [eax], mm0
+ movq [eax+8], mm1
+ movq [eax+ebx], mm2
+ movq [eax+ebx+8], mm3
+AVG_END
+
+AVG_START x264_pixel_avg_w16_sse2
+ movdqu xmm0, [ecx]
+ movdqu xmm1, [ecx+edx]
+ pavgb xmm0, [eax]
+ pavgb xmm1, [eax+ebx]
+ movdqa [eax], xmm0
+ movdqa [eax+ebx], xmm1
+AVG_END
+
+%macro AVGH 2
+cglobal x264_pixel_avg_%1x%2_mmxext
+ push esi
+ mov esi, %2
+ jmp x264_pixel_avg_w%1_mmxext
+%endmacro
+AVGH 16, 16
+AVGH 16, 8
+AVGH 8, 16
+AVGH 8, 8
+AVGH 8, 4
+AVGH 4, 8
+AVGH 4, 4
+AVGH 4, 2
+
+%macro AVG2_START 1
+cglobal %1
+ push ebx
+ push esi
+ push edi
+ push ebp
+ mov eax, [esp+20] ; dst
+ mov ebx, [esp+24] ; dst_stride
+ mov ecx, [esp+28] ; src1
+ mov edx, [esp+32] ; src_stride
+ mov edi, [esp+36] ; src2
+ mov esi, [esp+40] ; height
+ sub edi, ecx
+ lea ebp, [edi+edx]
+.height_loop:
+%endmacro
-;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w20_mmxext( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
-;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_w20_mmxext
- push ebp
- push ebx
- push esi
- push edi
-
- mov edi, [esp+20] ; dst
- mov ebx, [esp+28] ; src1
- mov ecx, [esp+36] ; src2
- mov esi, [esp+24] ; i_dst_stride
- mov eax, [esp+32] ; i_src1_stride
- mov edx, [esp+40] ; i_src2_stride
- mov ebp, [esp+44] ; i_height
-ALIGN 4
-.height_loop
- movq mm0, [ebx ]
- movq mm1, [ebx+8 ]
- movd mm2, [ebx+16]
- pavgb mm0, [ecx ]
- pavgb mm1, [ecx+8 ]
- pavgb mm2, [ecx+16]
- movq [edi ], mm0
- movq [edi+8 ], mm1
- movd [edi+16], mm2
- dec ebp
- lea ebx, [ebx+eax]
- lea ecx, [ecx+edx]
- lea edi, [edi+esi]
- jg .height_loop
-
- pop edi
- pop esi
- pop ebx
- pop ebp
+%macro AVG2_END 0
+ sub esi, 2
+ lea eax, [eax+ebx*2]
+ lea ecx, [ecx+edx*2]
+ jg .height_loop
+ pop ebp
+ pop edi
+ pop esi
+ pop ebx
ret
+%endmacro
+AVG2_START x264_pixel_avg2_w4_mmxext
+ movd mm0, [ecx]
+ movd mm1, [ecx+edx]
+ pavgb mm0, [ecx+edi]
+ pavgb mm1, [ecx+ebp]
+ movd [eax], mm0
+ movd [eax+ebx], mm1
+AVG2_END
+
+AVG2_START x264_pixel_avg2_w8_mmxext
+ movq mm0, [ecx]
+ movq mm1, [ecx+edx]
+ pavgb mm0, [ecx+edi]
+ pavgb mm1, [ecx+ebp]
+ movq [eax], mm0
+ movq [eax+ebx], mm1
+AVG2_END
+
+AVG2_START x264_pixel_avg2_w16_mmxext
+ movq mm0, [ecx]
+ movq mm1, [ecx+8]
+ movq mm2, [ecx+edx]
+ movq mm3, [ecx+edx+8]
+ pavgb mm0, [ecx+edi]
+ pavgb mm1, [ecx+edi+8]
+ pavgb mm2, [ecx+ebp]
+ pavgb mm3, [ecx+ebp+8]
+ movq [eax], mm0
+ movq [eax+8], mm1
+ movq [eax+ebx], mm2
+ movq [eax+ebx+8], mm3
+AVG2_END
+
+AVG2_START x264_pixel_avg2_w20_mmxext
+ movq mm0, [ecx]
+ movq mm1, [ecx+8]
+ movd mm2, [ecx+16]
+ movq mm3, [ecx+edx]
+ movq mm4, [ecx+edx+8]
+ movd mm5, [ecx+edx+16]
+ pavgb mm0, [ecx+edi]
+ pavgb mm1, [ecx+edi+8]
+ pavgb mm2, [ecx+edi+16]
+ pavgb mm3, [ecx+ebp]
+ pavgb mm4, [ecx+ebp+8]
+ pavgb mm5, [ecx+ebp+16]
+ movq [eax], mm0
+ movq [eax+8], mm1
+ movd [eax+16], mm2
+ movq [eax+ebx], mm3
+ movq [eax+ebx+8], mm4
+ movd [eax+ebx+16], mm5
+AVG2_END
-ALIGN 16
-;-----------------------------------------------------------------------------
-; void x264_pixel_avg_w16_sse2( uint8_t *dst, int i_dst_stride,
-; uint8_t *src1, int i_src1_stride,
-; uint8_t *src2, int i_src2_stride,
-; int i_height );
-;-----------------------------------------------------------------------------
-cglobal x264_pixel_avg_w16_sse2
- push ebp
- push ebx
- push esi
- push edi
-
- mov edi, [esp+20] ; dst
- mov ebx, [esp+28] ; src1
- mov ecx, [esp+36] ; src2
- mov esi, [esp+24] ; i_dst_stride
- mov eax, [esp+32] ; i_src1_stride
- mov edx, [esp+40] ; i_src2_stride
- mov ebp, [esp+44] ; i_height
-ALIGN 4
-.height_loop
- movdqu xmm0, [ebx]
- pavgb xmm0, [ecx]
- movdqu [edi], xmm0
-
- dec ebp
- lea ebx, [ebx+eax]
- lea ecx, [ecx+edx]
- lea edi, [edi+esi]
- jg .height_loop
-
- pop edi
- pop esi
- pop ebx
- pop ebp
- ret
-
;=============================================================================
; weighted prediction
#include "common/common.h"
/* NASM functions */
-extern void x264_pixel_avg_w4_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-extern void x264_pixel_avg_w8_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-extern void x264_pixel_avg_w16_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-extern void x264_pixel_avg_w20_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
-extern void x264_pixel_avg_w16_sse2( uint8_t *, int, uint8_t *, int, uint8_t *, int, int );
+extern void x264_pixel_avg_16x16_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg_16x8_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg_8x16_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg_8x8_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg_8x4_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg_4x8_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg_4x4_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg_4x2_mmxext( uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg2_w4_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg2_w8_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg2_w16_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int );
+extern void x264_pixel_avg2_w20_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int );
extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int );
extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int );
extern void x264_hpel_filter_mmxext( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
int i_stride, int i_width, int i_height );
-#define AVG(W,H) \
-static void x264_pixel_avg_ ## W ## x ## H ## _mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src ) \
-{ \
- x264_pixel_avg_w ## W ## _mmxext( dst, i_dst, dst, i_dst, src, i_src, H ); \
-}
-AVG(16,16)
-AVG(16,8)
-AVG(8,16)
-AVG(8,8)
-AVG(8,4)
-AVG(4,8)
-AVG(4,4)
-AVG(4,2)
-
#define AVG_WEIGHT(W,H) \
void x264_pixel_avg_weight_ ## W ## x ## H ## _mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_weight_dst ) \
{ \
AVG_WEIGHT(8,8)
AVG_WEIGHT(8,4)
-static void (* const x264_pixel_avg_wtab_mmxext[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ) =
+static void (* const x264_pixel_avg_wtab_mmxext[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) =
{
NULL,
- x264_pixel_avg_w4_mmxext,
- x264_pixel_avg_w8_mmxext,
- x264_pixel_avg_w16_mmxext,
- x264_pixel_avg_w16_mmxext,
- x264_pixel_avg_w20_mmxext,
+ x264_pixel_avg2_w4_mmxext,
+ x264_pixel_avg2_w8_mmxext,
+ x264_pixel_avg2_w16_mmxext,
+ x264_pixel_avg2_w16_mmxext,
+ x264_pixel_avg2_w20_mmxext,
};
static void (* const x264_mc_copy_wtab_mmx[5])( uint8_t *, int, uint8_t *, int, int ) =
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_mmxext[i_width>>2](
dst, i_dst_stride, src1, i_src_stride,
- src2, i_src_stride, i_height );
+ src2, i_height );
}
else
{
uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3);
x264_pixel_avg_wtab_mmxext[i_width>>2](
dst, *i_dst_stride, src1, i_src_stride,
- src2, i_src_stride, i_height );
+ src2, i_height );
return dst;
}
else
void x264_mc_mmxext_init( x264_mc_functions_t *pf )
{
- pf->mc_luma = mc_luma_mmx;
- pf->get_ref = get_ref_mmx;
+ pf->mc_luma = mc_luma_mmx;
+ pf->get_ref = get_ref_mmx;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmxext;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_mmxext;
void x264_mc_sse2_init( x264_mc_functions_t *pf )
{
/* todo: use sse2 */
- pf->mc_luma = mc_luma_mmx;
- pf->get_ref = get_ref_mmx;
}