From: Henrik Gramner Date: Fri, 5 Jul 2013 19:15:54 +0000 (+0200) Subject: x86: SSSE3 implementation of pixel_sad_x3 and pixel_sad_x4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e33aac9aba5c6b9c867b92f14c7722152680a61a;p=libx264 x86: SSSE3 implementation of pixel_sad_x3 and pixel_sad_x4 --- diff --git a/common/pixel.c b/common/pixel.c index 7a1e9550..b1cf7a3f 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -1195,9 +1195,8 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) } else { - pixf->sad_x4[PIXEL_8x4] = x264_pixel_sad_x4_8x4_ssse3; - pixf->sad_x4[PIXEL_8x8] = x264_pixel_sad_x4_8x8_ssse3; - pixf->sad_x4[PIXEL_8x16] = x264_pixel_sad_x4_8x16_ssse3; + INIT2( sad_x3, _ssse3 ); + INIT5( sad_x4, _ssse3 ); } if( (cpu&X264_CPU_SLOW_ATOM) || (cpu&X264_CPU_SLOW_SHUFFLE) ) { diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm index 45f4a106..2270011b 100644 --- a/common/x86/sad-a.asm +++ b/common/x86/sad-a.asm @@ -1241,21 +1241,34 @@ SAD_X 4, 4, 4 %endmacro %macro SAD_X3_END_SSE2 0 + movifnidn r5, r5mp +%if cpuflag(ssse3) + packssdw m0, m1 + packssdw m2, m2 + phaddd m0, m2 + mova [r5], m0 +%else movhlps m3, m0 movhlps m4, m1 movhlps m5, m2 paddw m0, m3 paddw m1, m4 paddw m2, m5 - movifnidn r5, r5mp movd [r5+0], m0 movd [r5+4], m1 movd [r5+8], m2 +%endif RET %endmacro %macro SAD_X4_END_SSE2 0 mov r0, r6mp +%if cpuflag(ssse3) + packssdw m0, m1 + packssdw m2, m3 + phaddd m0, m2 + mova [r0], m0 +%else psllq m1, 32 psllq m3, 32 paddw m0, m1 @@ -1266,6 +1279,7 @@ SAD_X 4, 4, 4 paddw m2, m3 movq [r0+0], m0 movq [r0+8], m2 +%endif RET %endmacro @@ -1504,9 +1518,13 @@ cglobal pixel_sad_x%1_%2x%3, 2+%1,3+%1,8 %endmacro INIT_XMM ssse3 -SAD_X_SSSE3 4, 8, 16 -SAD_X_SSSE3 4, 8, 8 -SAD_X_SSSE3 4, 8, 4 +SAD_X_SSE2 3, 16, 16, 7 +SAD_X_SSE2 3, 16, 8, 7 +SAD_X_SSE2 4, 16, 16, 7 +SAD_X_SSE2 4, 16, 8, 7 +SAD_X_SSSE3 4, 8, 16 +SAD_X_SSSE3 4, 8, 8 +SAD_X_SSSE3 4, 8, 4 INIT_XMM avx SAD_X_SSE2 3, 16, 16, 6