From: Loren Merritt Date: Mon, 15 Aug 2011 17:43:42 +0000 (+0000) Subject: Faster intra_mbcmp_x3 for versions without dedicated asm X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e184ff26233198932d9b77aa7feed6a49095f136;p=libx264 Faster intra_mbcmp_x3 for versions without dedicated asm Select asm subroutines more intelligently in the wrapper functions. --- diff --git a/common/pixel.c b/common/pixel.c index 7418121a..7da32153 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -29,6 +29,7 @@ #if HAVE_MMX # include "x86/pixel.h" +# include "x86/predict.h" #endif #if ARCH_PPC # include "ppc/pixel.h" @@ -498,57 +499,57 @@ SATD_X_DECL7( _neon ) #endif #endif // !HIGH_BIT_DEPTH -#define INTRA_MBCMP_8x8( mbcmp, cpu )\ +#define INTRA_MBCMP_8x8( mbcmp, cpu, cpu2 )\ void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\ {\ ALIGNED_ARRAY_16( pixel, pix, [8*FDEC_STRIDE] );\ - x264_predict_8x8_v_c( pix, edge );\ + x264_predict_8x8_v##cpu2( pix, edge );\ res[0] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\ - x264_predict_8x8_h_c( pix, edge );\ + x264_predict_8x8_h##cpu2( pix, edge );\ res[1] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\ - x264_predict_8x8_dc_c( pix, edge );\ + x264_predict_8x8_dc##cpu2( pix, edge );\ res[2] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\ } -INTRA_MBCMP_8x8( sad, ) -INTRA_MBCMP_8x8(sa8d, ) +INTRA_MBCMP_8x8( sad,, _c ) +INTRA_MBCMP_8x8(sa8d,, _c ) #if HIGH_BIT_DEPTH && HAVE_MMX -INTRA_MBCMP_8x8( sad, _mmx2 ) -INTRA_MBCMP_8x8( sad, _sse2 ) -INTRA_MBCMP_8x8( sad, _ssse3 ) -INTRA_MBCMP_8x8(sa8d, _sse2 ) +INTRA_MBCMP_8x8( sad, _mmx2, _c ) +INTRA_MBCMP_8x8( sad, _sse2, _sse2 ) +INTRA_MBCMP_8x8( sad, _ssse3, _sse2 ) +INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 ) #endif -#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu )\ -void x264_intra_##mbcmp##_x3_##size##x##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\ +#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\ +void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\ {\ - x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\ - res[0] = x264_pixel_##mbcmp##_##size##x##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ - x264_predict_##size##x##size##chroma##_##pred2##_c( fdec );\ - res[1] = x264_pixel_##mbcmp##_##size##x##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ - x264_predict_##size##x##size##chroma##_##pred3##_c( fdec );\ - res[2] = x264_pixel_##mbcmp##_##size##x##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ + x264_predict_##size##chroma##_##pred1##cpu2( fdec );\ + res[0] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ + x264_predict_##size##chroma##_##pred2##cpu2( fdec );\ + res[1] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ + x264_predict_##size##chroma##_##pred3##cpu2( fdec );\ + res[2] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\ } -INTRA_MBCMP( sad, 4, v, h, dc, , ) -INTRA_MBCMP(satd, 4, v, h, dc, , ) -INTRA_MBCMP( sad, 8, dc, h, v, c, ) -INTRA_MBCMP(satd, 8, dc, h, v, c, ) -INTRA_MBCMP( sad, 16, v, h, dc, , ) -INTRA_MBCMP(satd, 16, v, h, dc, , ) +INTRA_MBCMP( sad, 4x4, v, h, dc, ,, _c ) +INTRA_MBCMP(satd, 4x4, v, h, dc, ,, _c ) +INTRA_MBCMP( sad, 8x8, dc, h, v, c,, _c ) +INTRA_MBCMP(satd, 8x8, dc, h, v, c,, _c ) +INTRA_MBCMP( sad, 16x16, v, h, dc, ,, _c ) +INTRA_MBCMP(satd, 16x16, v, h, dc, ,, _c ) #if HIGH_BIT_DEPTH && HAVE_MMX -INTRA_MBCMP( sad, 4, v, h, dc, , _mmx2 ) -INTRA_MBCMP(satd, 4, v, h, dc, , _mmx2 ) -INTRA_MBCMP( sad, 8, dc, h, v, c, _mmx2 ) -INTRA_MBCMP(satd, 8, dc, h, v, c, _mmx2 ) -INTRA_MBCMP( sad, 16, v, h, dc, , _mmx2 ) -INTRA_MBCMP(satd, 16, v, h, dc, , _mmx2 ) -INTRA_MBCMP( sad, 8, dc, h, v, c, _sse2 ) -INTRA_MBCMP( sad, 16, v, h, dc, , _sse2 ) -INTRA_MBCMP( sad, 4, v, h, dc, , _ssse3 ) -INTRA_MBCMP( sad, 8, dc, h, v, c, _ssse3 ) -INTRA_MBCMP( sad, 16, v, h, dc, , _ssse3 ) +INTRA_MBCMP( sad, 4x4, v, h, dc, , _mmx2, _c ) +INTRA_MBCMP(satd, 4x4, v, h, dc, , _mmx2, _c ) +INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _c ) +INTRA_MBCMP(satd, 8x8, dc, h, v, c, _mmx2, _c ) +INTRA_MBCMP( sad, 16x16, v, h, dc, , _mmx2, _mmx2 ) +INTRA_MBCMP(satd, 16x16, v, h, dc, , _mmx2, _mmx2 ) +INTRA_MBCMP( sad, 8x8, dc, h, v, c, _sse2, _sse2 ) +INTRA_MBCMP( sad, 16x16, v, h, dc, , _sse2, _sse2 ) +INTRA_MBCMP( sad, 4x4, v, h, dc, , _ssse3, _c ) +INTRA_MBCMP( sad, 8x8, dc, h, v, c, _ssse3, _sse2 ) +INTRA_MBCMP( sad, 16x16, v, h, dc, , _ssse3, _sse2 ) #endif /**************************************************************************** diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm index c22f296b..c505573e 100644 --- a/common/x86/predict-a.asm +++ b/common/x86/predict-a.asm @@ -1744,7 +1744,7 @@ cglobal predict_8x8c_dc_top_mmx2, 1,1 ;----------------------------------------------------------------------------- %ifdef HIGH_BIT_DEPTH INIT_MMX -cglobal predict_16x16_v_mmx, 1,2 +cglobal predict_16x16_v_mmx2, 1,2 mova m0, [r0 - FDEC_STRIDEB+ 0] mova m1, [r0 - FDEC_STRIDEB+ 8] mova m2, [r0 - FDEC_STRIDEB+16] @@ -1759,7 +1759,7 @@ cglobal predict_16x16_v_sse2, 2,2 REP_RET %else ; !HIGH_BIT_DEPTH INIT_MMX -cglobal predict_16x16_v_mmx, 1,2 +cglobal predict_16x16_v_mmx2, 1,2 movq m0, [r0 - FDEC_STRIDE + 0] movq m1, [r0 - FDEC_STRIDE + 8] STORE16x16 m0, m1 diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c index a4f75adc..d0b7d395 100644 --- a/common/x86/predict-c.c +++ b/common/x86/predict-c.c @@ -29,94 +29,8 @@ #include "predict.h" #include "pixel.h" - void x264_predict_16x16_v_mmx( pixel *src ); - void x264_predict_16x16_v_sse2( pixel *src ); - void x264_predict_16x16_h_mmx2( pixel *src ); - void x264_predict_16x16_h_sse2( uint16_t *src ); - void x264_predict_16x16_h_ssse3( uint8_t *src ); - void x264_predict_16x16_dc_core_mmx2( pixel *src, int i_dc_left ); - void x264_predict_16x16_dc_core_sse2( pixel *src, int i_dc_left ); - void x264_predict_16x16_dc_left_core_mmx2( pixel *src, int i_dc_left ); - void x264_predict_16x16_dc_left_core_sse2( pixel *src, int i_dc_left ); - void x264_predict_16x16_dc_top_mmx2( pixel *src ); - void x264_predict_16x16_dc_top_sse2( pixel *src ); - void x264_predict_16x16_dc_top_ssse3( uint16_t *src ); - void x264_predict_16x16_p_core_mmx2( uint8_t *src, int i00, int b, int c ); - void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c ); - void x264_predict_16x16_p_core_avx( pixel *src, int i00, int b, int c ); - void x264_predict_8x8c_p_core_mmx2( uint8_t *src, int i00, int b, int c ); - void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c ); - void x264_predict_8x8c_dc_mmx2( pixel *src ); - void x264_predict_8x8c_dc_sse2( uint16_t *src ); - void x264_predict_8x8c_dc_top_mmx2( uint8_t *src ); - void x264_predict_8x8c_dc_top_sse2( uint16_t *src ); - void x264_predict_8x8c_v_mmx( pixel *src ); - void x264_predict_8x8c_v_sse2( uint16_t *src ); - void x264_predict_8x8c_h_mmx2( uint8_t *src ); - void x264_predict_8x8c_h_sse2( pixel *src ); - void x264_predict_8x8c_h_ssse3( uint8_t *src ); - void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[36] ); - void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[36] ); - void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[36] ); - void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[36] ); - void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[36] ); - void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[36] ); - void x264_predict_8x8_ddl_ssse3( pixel *src, pixel edge[36] ); - void x264_predict_8x8_ddl_ssse3_cache64( pixel *src, pixel edge[36] ); - void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[36] ); - void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] ); - void x264_predict_8x8_ddr_ssse3( pixel *src, pixel edge[36] ); - void x264_predict_8x8_ddr_ssse3_cache64( pixel *src, pixel edge[36] ); - void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] ); - void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_vl_avx( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] ); - void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] ); - void x264_predict_8x8_vr_ssse3( pixel *src, pixel edge[36] ); - void x264_predict_8x8_vr_avx( pixel *src, pixel edge[36] ); - void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[36] ); - void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[36] ); - void x264_predict_8x8_hu_avx( pixel *src, pixel edge[36] ); - void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[36] ); - void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[36] ); - void x264_predict_8x8_hd_avx( pixel *src, pixel edge[36] ); - void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbor, int i_filters ); - void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); - void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters ); - void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); - void x264_predict_4x4_ddl_mmx2( pixel *src ); - void x264_predict_4x4_ddl_sse2( uint16_t *src ); - void x264_predict_4x4_ddl_avx( uint16_t *src ); - void x264_predict_4x4_ddr_mmx2( pixel *src ); - void x264_predict_4x4_vl_mmx2( pixel *src ); - void x264_predict_4x4_vl_sse2( uint16_t *src ); - void x264_predict_4x4_vl_avx( uint16_t *src ); - void x264_predict_4x4_vr_mmx2( uint8_t *src ); - void x264_predict_4x4_vr_sse2( uint16_t *src ); - void x264_predict_4x4_vr_ssse3( pixel *src ); - void x264_predict_4x4_vr_ssse3_cache64( uint8_t *src ); - void x264_predict_4x4_vr_avx( uint16_t *src ); - void x264_predict_4x4_hd_mmx2( pixel *src ); - void x264_predict_4x4_hd_sse2( uint16_t *src ); - void x264_predict_4x4_hd_ssse3( pixel *src ); - void x264_predict_4x4_hd_avx( uint16_t *src ); - void x264_predict_4x4_dc_mmx2( pixel *src ); - void x264_predict_4x4_ddr_sse2( uint16_t *src ); - void x264_predict_4x4_ddr_ssse3( pixel *src ); - void x264_predict_4x4_ddr_avx( uint16_t *src ); - void x264_predict_4x4_hu_mmx2( pixel *src ); - #define PREDICT_16x16_DC(name)\ -static void x264_predict_16x16_dc_##name( pixel *src )\ +void x264_predict_16x16_dc_##name( pixel *src )\ {\ uint32_t dc = 16;\ for( int i = 0; i < 16; i += 2 )\ @@ -362,16 +276,13 @@ static void x264_predict_8x8c_dc_left( uint8_t *src ) ****************************************************************************/ void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] ) { - if( !(cpu&X264_CPU_MMX) ) + if( !(cpu&X264_CPU_MMX2) ) return; - pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx; - if( cpu&X264_CPU_MMX2 ) - { - pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_mmx2; - pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_mmx2; - pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2; - pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmx2; - } + pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_mmx2; + pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_mmx2; + pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2; + pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx2; + pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmx2; #if HIGH_BIT_DEPTH if( !(cpu&X264_CPU_SSE2) ) return; diff --git a/common/x86/predict.h b/common/x86/predict.h index eae39077..fba2f75f 100644 --- a/common/x86/predict.h +++ b/common/x86/predict.h @@ -31,4 +31,93 @@ void x264_predict_16x16_init_mmx ( int cpu, x264_predict_t pf[7] ); void x264_predict_8x8c_init_mmx ( int cpu, x264_predict_t pf[7] ); void x264_predict_4x4_init_mmx ( int cpu, x264_predict_t pf[12] ); void x264_predict_8x8_init_mmx ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter ); + +void x264_predict_16x16_v_mmx2( pixel *src ); +void x264_predict_16x16_v_sse2( pixel *src ); +void x264_predict_16x16_h_mmx2( pixel *src ); +void x264_predict_16x16_h_sse2( uint16_t *src ); +void x264_predict_16x16_h_ssse3( uint8_t *src ); +void x264_predict_16x16_dc_mmx2( pixel *src ); +void x264_predict_16x16_dc_sse2( pixel *src ); +void x264_predict_16x16_dc_core_mmx2( pixel *src, int i_dc_left ); +void x264_predict_16x16_dc_core_sse2( pixel *src, int i_dc_left ); +void x264_predict_16x16_dc_left_core_mmx2( pixel *src, int i_dc_left ); +void x264_predict_16x16_dc_left_core_sse2( pixel *src, int i_dc_left ); +void x264_predict_16x16_dc_top_mmx2( pixel *src ); +void x264_predict_16x16_dc_top_sse2( pixel *src ); +void x264_predict_16x16_dc_top_ssse3( uint16_t *src ); +void x264_predict_16x16_p_core_mmx2( uint8_t *src, int i00, int b, int c ); +void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c ); +void x264_predict_16x16_p_core_avx( pixel *src, int i00, int b, int c ); +void x264_predict_8x8c_p_core_mmx2( uint8_t *src, int i00, int b, int c ); +void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c ); +void x264_predict_8x8c_dc_mmx2( pixel *src ); +void x264_predict_8x8c_dc_sse2( uint16_t *src ); +void x264_predict_8x8c_dc_top_mmx2( uint8_t *src ); +void x264_predict_8x8c_dc_top_sse2( uint16_t *src ); +void x264_predict_8x8c_v_mmx( pixel *src ); +void x264_predict_8x8c_v_sse2( uint16_t *src ); +void x264_predict_8x8c_h_mmx2( uint8_t *src ); +void x264_predict_8x8c_h_sse2( pixel *src ); +void x264_predict_8x8c_h_ssse3( uint8_t *src ); +void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[36] ); +void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[36] ); +void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[36] ); +void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[36] ); +void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[36] ); +void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[36] ); +void x264_predict_8x8_ddl_ssse3( pixel *src, pixel edge[36] ); +void x264_predict_8x8_ddl_ssse3_cache64( pixel *src, pixel edge[36] ); +void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[36] ); +void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] ); +void x264_predict_8x8_ddr_ssse3( pixel *src, pixel edge[36] ); +void x264_predict_8x8_ddr_ssse3_cache64( pixel *src, pixel edge[36] ); +void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] ); +void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_vl_avx( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] ); +void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] ); +void x264_predict_8x8_vr_ssse3( pixel *src, pixel edge[36] ); +void x264_predict_8x8_vr_avx( pixel *src, pixel edge[36] ); +void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[36] ); +void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[36] ); +void x264_predict_8x8_hu_avx( pixel *src, pixel edge[36] ); +void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[36] ); +void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[36] ); +void x264_predict_8x8_hd_avx( pixel *src, pixel edge[36] ); +void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbor, int i_filters ); +void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); +void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters ); +void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters ); +void x264_predict_4x4_ddl_mmx2( pixel *src ); +void x264_predict_4x4_ddl_sse2( uint16_t *src ); +void x264_predict_4x4_ddl_avx( uint16_t *src ); +void x264_predict_4x4_ddr_mmx2( pixel *src ); +void x264_predict_4x4_vl_mmx2( pixel *src ); +void x264_predict_4x4_vl_sse2( uint16_t *src ); +void x264_predict_4x4_vl_avx( uint16_t *src ); +void x264_predict_4x4_vr_mmx2( uint8_t *src ); +void x264_predict_4x4_vr_sse2( uint16_t *src ); +void x264_predict_4x4_vr_ssse3( pixel *src ); +void x264_predict_4x4_vr_ssse3_cache64( uint8_t *src ); +void x264_predict_4x4_vr_avx( uint16_t *src ); +void x264_predict_4x4_hd_mmx2( pixel *src ); +void x264_predict_4x4_hd_sse2( uint16_t *src ); +void x264_predict_4x4_hd_ssse3( pixel *src ); +void x264_predict_4x4_hd_avx( uint16_t *src ); +void x264_predict_4x4_dc_mmx2( pixel *src ); +void x264_predict_4x4_ddr_sse2( uint16_t *src ); +void x264_predict_4x4_ddr_ssse3( pixel *src ); +void x264_predict_4x4_ddr_avx( uint16_t *src ); +void x264_predict_4x4_hu_mmx2( pixel *src ); + #endif