From: James Almer Date: Wed, 9 Apr 2014 06:33:05 +0000 (-0300) Subject: x86: XOP pixel_ssd_nv12_core X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0d989a4ff3298f9e495be452880b5f9bfb441e93;p=libx264 x86: XOP pixel_ssd_nv12_core --- diff --git a/common/pixel.c b/common/pixel.c index fc275ea8..78356c9d 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -1021,6 +1021,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) } if( cpu&X264_CPU_XOP ) { + pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_xop; pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_xop; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_xop; pixf->vsad = x264_pixel_vsad_xop; @@ -1313,6 +1314,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) pixf->sa8d[PIXEL_16x16]= x264_pixel_sa8d_16x16_xop; pixf->sa8d[PIXEL_8x8] = x264_pixel_sa8d_8x8_xop; pixf->intra_satd_x3_8x16c = x264_intra_satd_x3_8x16c_xop; + pixf->ssd_nv12_core = x264_pixel_ssd_nv12_core_xop; pixf->var[PIXEL_16x16] = x264_pixel_var_16x16_xop; pixf->var[PIXEL_8x16] = x264_pixel_var_8x16_xop; pixf->var[PIXEL_8x8] = x264_pixel_var_8x8_xop; diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm index 9791e866..85e7d5a4 100644 --- a/common/x86/pixel-a.asm +++ b/common/x86/pixel-a.asm @@ -561,10 +561,15 @@ cglobal pixel_ssd_nv12_core, 6,7,7 pshufhw m0, m0, q3120 pshufhw m1, m1, q3120 %endif +%if cpuflag(xop) + pmadcswd m2, m0, m0, m2 + pmadcswd m3, m1, m1, m3 +%else pmaddwd m0, m0 pmaddwd m1, m1 paddd m2, m0 paddd m3, m1 +%endif add r6, 2*mmsize jl .loopx %if mmsize == 32 ; avx2 may overread by 32 bytes, that has to be handled @@ -657,10 +662,15 @@ cglobal pixel_ssd_nv12_core, 6,7 por m0, m1 psrlw m2, m0, 8 pand m0, m5 +%if cpuflag(xop) + pmadcswd m4, m2, m2, m4 + pmadcswd m3, m0, m0, m3 +%else pmaddwd m2, m2 pmaddwd m0, m0 - paddd m3, m0 paddd m4, m2 + paddd m3, m0 +%endif add r6, mmsize jl .loopx %if mmsize == 32 ; avx2 may overread by 16 bytes, that has to be handled @@ -695,6 +705,8 @@ INIT_XMM sse2 SSD_NV12 INIT_XMM avx SSD_NV12 +INIT_XMM xop +SSD_NV12 INIT_YMM avx2 SSD_NV12 diff --git a/common/x86/pixel.h b/common/x86/pixel.h index 362bc30c..0ff68bab 100644 --- a/common/x86/pixel.h +++ b/common/x86/pixel.h @@ -153,6 +153,9 @@ void x264_pixel_ssd_nv12_core_sse2( pixel *pixuv1, intptr_t stride1, void x264_pixel_ssd_nv12_core_avx ( pixel *pixuv1, intptr_t stride1, pixel *pixuv2, intptr_t stride2, int width, int height, uint64_t *ssd_u, uint64_t *ssd_v ); +void x264_pixel_ssd_nv12_core_xop ( pixel *pixuv1, intptr_t stride1, + pixel *pixuv2, intptr_t stride2, int width, + int height, uint64_t *ssd_u, uint64_t *ssd_v ); void x264_pixel_ssd_nv12_core_avx2( pixel *pixuv1, intptr_t stride1, pixel *pixuv2, intptr_t stride2, int width, int height, uint64_t *ssd_u, uint64_t *ssd_v );