From: Loren Merritt Date: Mon, 19 Nov 2007 17:08:07 +0000 (+0000) Subject: separate pixel_avg into cases for mc and for bipred X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=94c55d8cc75ea88987ec6766d83ea0dd0aa7384f;p=libx264 separate pixel_avg into cases for mc and for bipred git-svn-id: svn://svn.videolan.org/x264/trunk@693 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/amd64/mc-a.asm b/common/amd64/mc-a.asm index f1b4bea0..cafce508 100644 --- a/common/amd64/mc-a.asm +++ b/common/amd64/mc-a.asm @@ -61,135 +61,163 @@ SECTION .text ;============================================================================= ;----------------------------------------------------------------------------- -; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); +; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int dst_stride, +; uint8_t *src, int src_stride, +; int height ); ;----------------------------------------------------------------------------- cglobal x264_pixel_avg_w4_mmxext - mov r10, parm5q ; src2 - movsxd r11, parm6d ; i_src2_stride - mov eax, parm7d ; i_height - -ALIGN 4 -.height_loop - movd mm0, [parm3q] - movd mm1, [parm3q+parm4q] - pavgb mm0, [r10] - pavgb mm1, [r10+r11] - movd [parm1q], mm0 - movd [parm1q+parm2q], mm1 - sub eax, 2 - lea parm3q, [parm3q+parm4q*2] - lea r10, [r10+r11*2] - lea parm1q, [parm1q+parm2q*2] - jg .height_loop +.height_loop: + movd mm0, [parm3q] + movd mm1, [parm3q+parm4q] + pavgb mm0, [parm1q] + pavgb mm1, [parm1q+parm2q] + movd [parm1q], mm0 + movd [parm1q+parm2q], mm1 + sub temp1d, 2 + lea parm3q, [parm3q+parm4q*2] + lea parm1q, [parm1q+parm2q*2] + jg .height_loop rep ret - - -;----------------------------------------------------------------------------- -; void x264_pixel_avg_w8_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); -;----------------------------------------------------------------------------- cglobal x264_pixel_avg_w8_mmxext - mov r10, parm5q ; src2 - movsxd r11, parm6d ; i_src2_stride - mov eax, parm7d ; i_height - -ALIGN 4 -.height_loop - movq mm0, [parm3q] - movq mm1, [parm3q+parm4q] - pavgb mm0, [r10] - pavgb mm1, [r10+r11] - movq [parm1q], mm0 - movq [parm1q+parm2q], mm1 - sub eax, 2 - lea parm3q, [parm3q+parm4q*2] - lea r10, [r10+r11*2] - lea parm1q, [parm1q+parm2q*2] - jg .height_loop +.height_loop: + movq mm0, [parm3q] + movq mm1, [parm3q+parm4q] + pavgb mm0, [parm1q] + pavgb mm1, [parm1q+parm2q] + movq [parm1q], mm0 + movq [parm1q+parm2q], mm1 + sub temp1d, 2 + lea parm3q, [parm3q+parm4q*2] + lea parm1q, [parm1q+parm2q*2] + jg .height_loop rep ret -;----------------------------------------------------------------------------- -; void x264_pixel_avg_w16_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); -;----------------------------------------------------------------------------- cglobal x264_pixel_avg_w16_mmxext - mov r10, parm5q ; src2 - movsxd r11, parm6d ; i_src2_stride - mov eax, parm7d ; i_height +.height_loop: + movq mm0, [parm3q ] + movq mm1, [parm3q+8] + movq mm2, [parm3q+parm4q ] + movq mm3, [parm3q+parm4q+8] + pavgb mm0, [parm1q ] + pavgb mm1, [parm1q+8] + pavgb mm2, [parm1q+parm2q ] + pavgb mm3, [parm1q+parm2q+8] + movq [parm1q ], mm0 + movq [parm1q+8], mm1 + movq [parm1q+parm2q ], mm2 + movq [parm1q+parm2q+8], mm3 + sub temp1d, 2 + lea parm3q, [parm3q+parm4q*2] + lea parm1q, [parm1q+parm2q*2] + jg .height_loop + rep ret -ALIGN 4 -.height_loop - movq mm0, [parm3q ] - movq mm1, [parm3q+8] - pavgb mm0, [r10 ] - pavgb mm1, [r10+8] - movq [parm1q ], mm0 - movq [parm1q+8], mm1 - dec eax - lea parm3q, [parm3q+parm4q] - lea r10, [r10+r11] - lea parm1q, [parm1q+parm2q] - jg .height_loop +cglobal x264_pixel_avg_w16_sse2 +.height_loop: + movdqu xmm0, [parm3q] + movdqu xmm1, [parm3q+parm4q] + pavgb xmm0, [parm1q] + pavgb xmm1, [parm1q+parm2q] + movdqa [parm1q], xmm0 + movdqa [parm1q+parm2q], xmm1 + sub temp1d, 2 + lea parm3q, [parm3q+parm4q*2] + lea parm1q, [parm1q+parm2q*2] + jg .height_loop rep ret +%macro AVGH 2 +cglobal x264_pixel_avg_%1x%2_mmxext + mov temp1d, %2 + jmp x264_pixel_avg_w%1_mmxext +%endmacro + +AVGH 16, 16 +AVGH 16, 8 +AVGH 8, 16 +AVGH 8, 8 +AVGH 8, 4 +AVGH 4, 8 +AVGH 4, 4 +AVGH 4, 2 + ;----------------------------------------------------------------------------- -; void x264_pixel_avg_w20_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); +; void x264_pixel_avg2_w4_mmxext( uint8_t *dst, int dst_stride, +; uint8_t *src1, int src_stride, +; uint8_t *src2, int height ); ;----------------------------------------------------------------------------- -cglobal x264_pixel_avg_w20_mmxext - mov r10, parm5q ; src2 - movsxd r11, parm6d ; i_src2_stride - mov eax, parm7d ; i_height +%macro AVG2_START 0 +%ifdef WIN64 + mov temp1d, parm6d + mov temp2q, parm5q +%endif + sub parm5q, parm3q +%endmacro -ALIGN 4 -.height_loop - movq mm0, [parm3q ] - movq mm1, [parm3q+8 ] - movd mm2, [parm3q+16] - pavgb mm0, [r10 ] - pavgb mm1, [r10+8 ] - pavgb mm2, [r10+16] - movq [parm1q ], mm0 - movq [parm1q+8 ], mm1 - movd [parm1q+16], mm2 - dec eax - lea parm3q, [parm3q+parm4q] - lea r10, [r10+r11] - lea parm1q, [parm1q+parm2q] - jg .height_loop +cglobal x264_pixel_avg2_w4_mmxext + AVG2_START + lea r10, [temp2q+parm4q] +.height_loop: + movd mm0, [parm3q] + movd mm1, [parm3q+parm4q] + pavgb mm0, [parm3q+temp2q] + pavgb mm1, [parm3q+r10] + movd [parm1q], mm0 + movd [parm1q+parm2q], mm1 + sub temp1d, 2 + lea parm3q, [parm3q+parm4q*2] + lea parm1q, [parm1q+parm2q*2] + jg .height_loop rep ret -;----------------------------------------------------------------------------- -; void x264_pixel_avg_w16_sse2( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); -;----------------------------------------------------------------------------- -cglobal x264_pixel_avg_w16_sse2 - mov r10, parm5q ; src2 - movsxd r11, parm6d ; i_src2_stride - mov eax, parm7d ; i_height +cglobal x264_pixel_avg2_w8_mmxext + AVG2_START + lea r10, [temp2q+parm4q] +.height_loop: + movq mm0, [parm3q] + movq mm1, [parm3q+parm4q] + pavgb mm0, [parm3q+temp2q] + pavgb mm1, [parm3q+r10] + movq [parm1q], mm0 + movq [parm1q+parm2q], mm1 + sub temp1d, 2 + lea parm3q, [parm3q+parm4q*2] + lea parm1q, [parm1q+parm2q*2] + jg .height_loop + rep ret -ALIGN 4 -.height_loop - movdqu xmm0, [parm3q] - pavgb xmm0, [r10] - movdqu [parm1q], xmm0 - dec rax - lea parm3q, [parm3q+parm4q] - lea r10, [r10+r11] - lea parm1q, [parm1q+parm2q] - jg .height_loop +cglobal x264_pixel_avg2_w16_mmxext + AVG2_START +.height_loop: + movq mm0, [parm3q] + movq mm1, [parm3q+8] + pavgb mm0, [parm3q+temp2q] + pavgb mm1, [parm3q+temp2q+8] + movq [parm1q], mm0 + movq [parm1q+8], mm1 + add parm3q, parm4q + add parm1q, parm2q + dec temp1d + jg .height_loop + rep ret + +cglobal x264_pixel_avg2_w20_mmxext + AVG2_START +.height_loop: + movq mm0, [parm3q] + movq mm1, [parm3q+8] + movd mm2, [parm3q+16] + pavgb mm0, [parm3q+temp2q] + pavgb mm1, [parm3q+temp2q+8] + pavgb mm2, [parm3q+temp2q+16] + movq [parm1q], mm0 + movq [parm1q+8], mm1 + movd [parm1q+16], mm2 + add parm3q, parm4q + add parm1q, parm2q + dec temp1d + jg .height_loop rep ret diff --git a/common/i386/mc-a.asm b/common/i386/mc-a.asm index c689a8df..71910739 100644 --- a/common/i386/mc-a.asm +++ b/common/i386/mc-a.asm @@ -61,208 +61,173 @@ SECTION .text ;============================================================================= ;----------------------------------------------------------------------------- -; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); +; void x264_pixel_avg_w4_mmxext( uint8_t *dst, int dst_stride, +; uint8_t *src, int src_stride, +; int height ); ;----------------------------------------------------------------------------- -cglobal x264_pixel_avg_w4_mmxext - push ebp - push ebx - push esi - push edi - - mov edi, [esp+20] ; dst - mov ebx, [esp+28] ; src1 - mov ecx, [esp+36] ; src2 - mov esi, [esp+24] ; i_dst_stride - mov eax, [esp+32] ; i_src1_stride - mov edx, [esp+40] ; i_src2_stride - mov ebp, [esp+44] ; i_height -ALIGN 4 -.height_loop - movd mm0, [ebx] - pavgb mm0, [ecx] - movd mm1, [ebx+eax] - pavgb mm1, [ecx+edx] - movd [edi], mm0 - movd [edi+esi], mm1 - dec ebp - dec ebp - lea ebx, [ebx+eax*2] - lea ecx, [ecx+edx*2] - lea edi, [edi+esi*2] - jg .height_loop - - pop edi - pop esi - pop ebx - pop ebp - ret - - - -;----------------------------------------------------------------------------- -; void x264_pixel_avg_w8_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); -;----------------------------------------------------------------------------- -cglobal x264_pixel_avg_w8_mmxext - push ebp - push ebx - push esi - push edi - - mov edi, [esp+20] ; dst - mov ebx, [esp+28] ; src1 - mov ecx, [esp+36] ; src2 - mov esi, [esp+24] ; i_dst_stride - mov eax, [esp+32] ; i_src1_stride - mov edx, [esp+40] ; i_src2_stride - mov ebp, [esp+44] ; i_height -ALIGN 4 -.height_loop - movq mm0, [ebx] - pavgb mm0, [ecx] - movq [edi], mm0 - dec ebp - lea ebx, [ebx+eax] - lea ecx, [ecx+edx] - lea edi, [edi+esi] - jg .height_loop - - pop edi - pop esi - pop ebx - pop ebp - ret - - +%macro AVG_START 1 +cglobal %1 + push ebx + mov eax, [esp+12] ; dst + mov ebx, [esp+16] ; dst_stride + mov ecx, [esp+20] ; src + mov edx, [esp+24] ; src_stride + ; esi = height +.height_loop: +%endmacro -;----------------------------------------------------------------------------- -; void x264_pixel_avg_w16_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); -;----------------------------------------------------------------------------- -cglobal x264_pixel_avg_w16_mmxext - push ebp - push ebx - push esi - push edi - - mov edi, [esp+20] ; dst - mov ebx, [esp+28] ; src1 - mov ecx, [esp+36] ; src2 - mov esi, [esp+24] ; i_dst_stride - mov eax, [esp+32] ; i_src1_stride - mov edx, [esp+40] ; i_src2_stride - mov ebp, [esp+44] ; i_height -ALIGN 4 -.height_loop - movq mm0, [ebx ] - movq mm1, [ebx+8] - pavgb mm0, [ecx ] - pavgb mm1, [ecx+8] - movq [edi ], mm0 - movq [edi+8], mm1 - dec ebp - lea ebx, [ebx+eax] - lea ecx, [ecx+edx] - lea edi, [edi+esi] - jg .height_loop - - pop edi - pop esi - pop ebx - pop ebp +%macro AVG_END 0 + sub esi, 2 + lea eax, [eax+ebx*2] + lea ecx, [ecx+edx*2] + jg .height_loop + pop ebx + pop esi ret +%endmacro +AVG_START x264_pixel_avg_w4_mmxext + movd mm0, [ecx] + movd mm1, [ecx+edx] + pavgb mm0, [eax] + pavgb mm1, [eax+ebx] + movd [eax], mm0 + movd [eax+ebx], mm1 +AVG_END + +AVG_START x264_pixel_avg_w8_mmxext + movq mm0, [ecx] + movq mm1, [ecx+edx] + pavgb mm0, [eax] + pavgb mm1, [eax+ebx] + movq [eax], mm0 + movq [eax+ebx], mm1 +AVG_END + +AVG_START x264_pixel_avg_w16_mmxext + movq mm0, [ecx] + movq mm1, [ecx+8] + movq mm2, [ecx+edx] + movq mm3, [ecx+edx+8] + pavgb mm0, [eax] + pavgb mm1, [eax+8] + pavgb mm2, [eax+ebx] + pavgb mm3, [eax+ebx+8] + movq [eax], mm0 + movq [eax+8], mm1 + movq [eax+ebx], mm2 + movq [eax+ebx+8], mm3 +AVG_END + +AVG_START x264_pixel_avg_w16_sse2 + movdqu xmm0, [ecx] + movdqu xmm1, [ecx+edx] + pavgb xmm0, [eax] + pavgb xmm1, [eax+ebx] + movdqa [eax], xmm0 + movdqa [eax+ebx], xmm1 +AVG_END + +%macro AVGH 2 +cglobal x264_pixel_avg_%1x%2_mmxext + push esi + mov esi, %2 + jmp x264_pixel_avg_w%1_mmxext +%endmacro +AVGH 16, 16 +AVGH 16, 8 +AVGH 8, 16 +AVGH 8, 8 +AVGH 8, 4 +AVGH 4, 8 +AVGH 4, 4 +AVGH 4, 2 + +%macro AVG2_START 1 +cglobal %1 + push ebx + push esi + push edi + push ebp + mov eax, [esp+20] ; dst + mov ebx, [esp+24] ; dst_stride + mov ecx, [esp+28] ; src1 + mov edx, [esp+32] ; src_stride + mov edi, [esp+36] ; src2 + mov esi, [esp+40] ; height + sub edi, ecx + lea ebp, [edi+edx] +.height_loop: +%endmacro -;----------------------------------------------------------------------------- -; void x264_pixel_avg_w20_mmxext( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); -;----------------------------------------------------------------------------- -cglobal x264_pixel_avg_w20_mmxext - push ebp - push ebx - push esi - push edi - - mov edi, [esp+20] ; dst - mov ebx, [esp+28] ; src1 - mov ecx, [esp+36] ; src2 - mov esi, [esp+24] ; i_dst_stride - mov eax, [esp+32] ; i_src1_stride - mov edx, [esp+40] ; i_src2_stride - mov ebp, [esp+44] ; i_height -ALIGN 4 -.height_loop - movq mm0, [ebx ] - movq mm1, [ebx+8 ] - movd mm2, [ebx+16] - pavgb mm0, [ecx ] - pavgb mm1, [ecx+8 ] - pavgb mm2, [ecx+16] - movq [edi ], mm0 - movq [edi+8 ], mm1 - movd [edi+16], mm2 - dec ebp - lea ebx, [ebx+eax] - lea ecx, [ecx+edx] - lea edi, [edi+esi] - jg .height_loop - - pop edi - pop esi - pop ebx - pop ebp +%macro AVG2_END 0 + sub esi, 2 + lea eax, [eax+ebx*2] + lea ecx, [ecx+edx*2] + jg .height_loop + pop ebp + pop edi + pop esi + pop ebx ret +%endmacro +AVG2_START x264_pixel_avg2_w4_mmxext + movd mm0, [ecx] + movd mm1, [ecx+edx] + pavgb mm0, [ecx+edi] + pavgb mm1, [ecx+ebp] + movd [eax], mm0 + movd [eax+ebx], mm1 +AVG2_END + +AVG2_START x264_pixel_avg2_w8_mmxext + movq mm0, [ecx] + movq mm1, [ecx+edx] + pavgb mm0, [ecx+edi] + pavgb mm1, [ecx+ebp] + movq [eax], mm0 + movq [eax+ebx], mm1 +AVG2_END + +AVG2_START x264_pixel_avg2_w16_mmxext + movq mm0, [ecx] + movq mm1, [ecx+8] + movq mm2, [ecx+edx] + movq mm3, [ecx+edx+8] + pavgb mm0, [ecx+edi] + pavgb mm1, [ecx+edi+8] + pavgb mm2, [ecx+ebp] + pavgb mm3, [ecx+ebp+8] + movq [eax], mm0 + movq [eax+8], mm1 + movq [eax+ebx], mm2 + movq [eax+ebx+8], mm3 +AVG2_END + +AVG2_START x264_pixel_avg2_w20_mmxext + movq mm0, [ecx] + movq mm1, [ecx+8] + movd mm2, [ecx+16] + movq mm3, [ecx+edx] + movq mm4, [ecx+edx+8] + movd mm5, [ecx+edx+16] + pavgb mm0, [ecx+edi] + pavgb mm1, [ecx+edi+8] + pavgb mm2, [ecx+edi+16] + pavgb mm3, [ecx+ebp] + pavgb mm4, [ecx+ebp+8] + pavgb mm5, [ecx+ebp+16] + movq [eax], mm0 + movq [eax+8], mm1 + movd [eax+16], mm2 + movq [eax+ebx], mm3 + movq [eax+ebx+8], mm4 + movd [eax+ebx+16], mm5 +AVG2_END -ALIGN 16 -;----------------------------------------------------------------------------- -; void x264_pixel_avg_w16_sse2( uint8_t *dst, int i_dst_stride, -; uint8_t *src1, int i_src1_stride, -; uint8_t *src2, int i_src2_stride, -; int i_height ); -;----------------------------------------------------------------------------- -cglobal x264_pixel_avg_w16_sse2 - push ebp - push ebx - push esi - push edi - - mov edi, [esp+20] ; dst - mov ebx, [esp+28] ; src1 - mov ecx, [esp+36] ; src2 - mov esi, [esp+24] ; i_dst_stride - mov eax, [esp+32] ; i_src1_stride - mov edx, [esp+40] ; i_src2_stride - mov ebp, [esp+44] ; i_height -ALIGN 4 -.height_loop - movdqu xmm0, [ebx] - pavgb xmm0, [ecx] - movdqu [edi], xmm0 - - dec ebp - lea ebx, [ebx+eax] - lea ecx, [ecx+edx] - lea edi, [edi+esi] - jg .height_loop - - pop edi - pop esi - pop ebx - pop ebp - ret - ;============================================================================= ; weighted prediction diff --git a/common/i386/mc-c.c b/common/i386/mc-c.c index bbe15dc6..e60c15f3 100644 --- a/common/i386/mc-c.c +++ b/common/i386/mc-c.c @@ -28,11 +28,18 @@ #include "common/common.h" /* NASM functions */ -extern void x264_pixel_avg_w4_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ); -extern void x264_pixel_avg_w8_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ); -extern void x264_pixel_avg_w16_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ); -extern void x264_pixel_avg_w20_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ); -extern void x264_pixel_avg_w16_sse2( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ); +extern void x264_pixel_avg_16x16_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg_16x8_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg_8x16_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg_8x8_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg_8x4_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg_4x8_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg_4x4_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg_4x2_mmxext( uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg2_w4_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg2_w8_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg2_w16_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int ); +extern void x264_pixel_avg2_w20_mmxext( uint8_t *, int, uint8_t *, int, uint8_t *, int ); extern void x264_pixel_avg_weight_4x4_mmxext( uint8_t *, int, uint8_t *, int, int ); extern void x264_pixel_avg_weight_w8_mmxext( uint8_t *, int, uint8_t *, int, int, int ); extern void x264_pixel_avg_weight_w16_mmxext( uint8_t *, int, uint8_t *, int, int, int ); @@ -46,20 +53,6 @@ extern void x264_prefetch_ref_mmxext( uint8_t *, int, int ); extern void x264_hpel_filter_mmxext( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, int i_stride, int i_width, int i_height ); -#define AVG(W,H) \ -static void x264_pixel_avg_ ## W ## x ## H ## _mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src ) \ -{ \ - x264_pixel_avg_w ## W ## _mmxext( dst, i_dst, dst, i_dst, src, i_src, H ); \ -} -AVG(16,16) -AVG(16,8) -AVG(8,16) -AVG(8,8) -AVG(8,4) -AVG(4,8) -AVG(4,4) -AVG(4,2) - #define AVG_WEIGHT(W,H) \ void x264_pixel_avg_weight_ ## W ## x ## H ## _mmxext( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int i_weight_dst ) \ { \ @@ -71,14 +64,14 @@ AVG_WEIGHT(8,16) AVG_WEIGHT(8,8) AVG_WEIGHT(8,4) -static void (* const x264_pixel_avg_wtab_mmxext[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int, int ) = +static void (* const x264_pixel_avg_wtab_mmxext[6])( uint8_t *, int, uint8_t *, int, uint8_t *, int ) = { NULL, - x264_pixel_avg_w4_mmxext, - x264_pixel_avg_w8_mmxext, - x264_pixel_avg_w16_mmxext, - x264_pixel_avg_w16_mmxext, - x264_pixel_avg_w20_mmxext, + x264_pixel_avg2_w4_mmxext, + x264_pixel_avg2_w8_mmxext, + x264_pixel_avg2_w16_mmxext, + x264_pixel_avg2_w16_mmxext, + x264_pixel_avg2_w20_mmxext, }; static void (* const x264_mc_copy_wtab_mmx[5])( uint8_t *, int, uint8_t *, int, int ) = { @@ -105,7 +98,7 @@ void mc_luma_mmx( uint8_t *src[4], int i_src_stride, uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_mmxext[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, - src2, i_src_stride, i_height ); + src2, i_height ); } else { @@ -128,7 +121,7 @@ uint8_t *get_ref_mmx( uint8_t *src[4], int i_src_stride, uint8_t *src2 = src[hpel_ref1[qpel_idx]] + offset + ((mvx&3) == 3); x264_pixel_avg_wtab_mmxext[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, - src2, i_src_stride, i_height ); + src2, i_height ); return dst; } else @@ -141,8 +134,8 @@ uint8_t *get_ref_mmx( uint8_t *src[4], int i_src_stride, void x264_mc_mmxext_init( x264_mc_functions_t *pf ) { - pf->mc_luma = mc_luma_mmx; - pf->get_ref = get_ref_mmx; + pf->mc_luma = mc_luma_mmx; + pf->get_ref = get_ref_mmx; pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmxext; pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_mmxext; @@ -174,6 +167,4 @@ void x264_mc_mmxext_init( x264_mc_functions_t *pf ) void x264_mc_sse2_init( x264_mc_functions_t *pf ) { /* todo: use sse2 */ - pf->mc_luma = mc_luma_mmx; - pf->get_ref = get_ref_mmx; }