From fa40b44f339501917e7a7c003ab826bf3e7b6a10 Mon Sep 17 00:00:00 2001 From: Henrik Gramner Date: Tue, 16 Apr 2013 23:27:08 +0200 Subject: [PATCH] x86: AVX2 high bit-depth predict_4x4_h --- common/x86/predict-a.asm | 10 ++++++++++ common/x86/predict-c.c | 3 +++ common/x86/predict.h | 1 + 3 files changed, 14 insertions(+) diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm index 15bb0755..a0ec5e58 100644 --- a/common/x86/predict-a.asm +++ b/common/x86/predict-a.asm @@ -215,6 +215,16 @@ cextern pw_pixel_max %endif %endmacro +;----------------------------------------------------------------------------- +; void predict_4x4_h( pixel *src ) +;----------------------------------------------------------------------------- +%if HIGH_BIT_DEPTH +INIT_XMM avx2 +cglobal predict_4x4_h, 1,1 + PRED_H_4ROWS 4, 0 + RET +%endif + ;----------------------------------------------------------------------------- ; void predict_4x4_ddl( pixel *src ) ;----------------------------------------------------------------------------- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c index 58b8660b..4960d83b 100644 --- a/common/x86/predict-c.c +++ b/common/x86/predict-c.c @@ -587,6 +587,9 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] ) pf[I_PRED_4x4_HD] = x264_predict_4x4_hd_avx; pf[I_PRED_4x4_VL] = x264_predict_4x4_vl_avx; pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_avx; + if( !(cpu&X264_CPU_AVX2) ) + return; + pf[I_PRED_4x4_H] = x264_predict_4x4_h_avx2; #else pf[I_PRED_4x4_VR] = x264_predict_4x4_vr_mmx2; if( !(cpu&X264_CPU_SSSE3) ) diff --git a/common/x86/predict.h b/common/x86/predict.h index 8f107211..2df09750 100644 --- a/common/x86/predict.h +++ b/common/x86/predict.h @@ -117,6 +117,7 @@ void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbo void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters ); void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); +void x264_predict_4x4_h_avx2( uint16_t *src ); void x264_predict_4x4_ddl_mmx2( pixel *src ); void x264_predict_4x4_ddl_sse2( uint16_t *src ); void x264_predict_4x4_ddl_avx( uint16_t *src ); -- 2.40.0