From 315285741877f89c660b9cefc3114963e95cf56a Mon Sep 17 00:00:00 2001 From: Loren Merritt <pengvado@videolan.org> Date: Sun, 2 Mar 2008 02:26:00 +0000 Subject: [PATCH] cosmetics in dsp init git-svn-id: svn://svn.videolan.org/x264/trunk@743 df754926-b1dd-0310-bc7b-ec298dee348c --- common/i386/mc-c.c | 40 +++++++++++++----------- common/i386/mc.h | 3 +- common/i386/predict-c.c | 68 +++++++++++++++++++++++------------------ common/i386/predict.h | 10 +++--- common/mc.c | 7 ++--- common/predict.c | 30 +++--------------- 6 files changed, 71 insertions(+), 87 deletions(-) diff --git a/common/i386/mc-c.c b/common/i386/mc-c.c index 8ac3bf66..212a8b22 100644 --- a/common/i386/mc-c.c +++ b/common/i386/mc-c.c @@ -84,10 +84,10 @@ static void (* const x264_mc_copy_wtab_mmx[5])( uint8_t *, int, uint8_t *, int, static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1}; static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; -void mc_luma_mmx( uint8_t *dst, int i_dst_stride, - uint8_t *src[4], int i_src_stride, - int mvx, int mvy, - int i_width, int i_height ) +void mc_luma_mmxext( uint8_t *dst, int i_dst_stride, + uint8_t *src[4], int i_src_stride, + int mvx, int mvy, + int i_width, int i_height ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); @@ -107,10 +107,10 @@ void mc_luma_mmx( uint8_t *dst, int i_dst_stride, } } -uint8_t *get_ref_mmx( uint8_t *dst, int *i_dst_stride, - uint8_t *src[4], int i_src_stride, - int mvx, int mvy, - int i_width, int i_height ) +uint8_t *get_ref_mmxext( uint8_t *dst, int *i_dst_stride, + uint8_t *src[4], int i_src_stride, + int mvx, int mvy, + int i_width, int i_height ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); @@ -132,10 +132,20 @@ uint8_t *get_ref_mmx( uint8_t *dst, int *i_dst_stride, } -void x264_mc_mmxext_init( x264_mc_functions_t *pf ) +void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) { - pf->mc_luma = mc_luma_mmx; - pf->get_ref = get_ref_mmx; + if( !(cpu&X264_CPU_MMX) ) + return; + + pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx; + pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx; + pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx; + + if( !(cpu&X264_CPU_MMXEXT) ) + return; + + pf->mc_luma = mc_luma_mmxext; + pf->get_ref = get_ref_mmxext; pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmxext; pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_mmxext; @@ -154,17 +164,11 @@ void x264_mc_mmxext_init( x264_mc_functions_t *pf ) pf->avg_weight[PIXEL_4x4] = x264_pixel_avg_weight_4x4_mmxext; // avg_weight_4x8 is rare and 4x2 is not used - pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx; - pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx; - pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx; - pf->plane_copy = x264_plane_copy_mmxext; pf->hpel_filter = x264_hpel_filter_mmxext; pf->prefetch_fenc = x264_prefetch_fenc_mmxext; pf->prefetch_ref = x264_prefetch_ref_mmxext; -} -void x264_mc_sse2_init( x264_mc_functions_t *pf ) -{ + /* todo: use sse2 */ } diff --git a/common/i386/mc.h b/common/i386/mc.h index bde31b0d..40f23596 100644 --- a/common/i386/mc.h +++ b/common/i386/mc.h @@ -24,8 +24,7 @@ #ifndef _I386_MC_H #define _I386_MC_H 1 -void x264_mc_mmxext_init( x264_mc_functions_t *pf ); -void x264_mc_sse2_init( x264_mc_functions_t *pf ); +void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ); void x264_mc_chroma_mmxext( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, diff --git a/common/i386/predict-c.c b/common/i386/predict-c.c index 956a4c17..63f48307 100644 --- a/common/i386/predict-c.c +++ b/common/i386/predict-c.c @@ -72,7 +72,7 @@ static void predict_16x16_p_##name( uint8_t *src )\ PREDICT_16x16_P( mmxext ) PREDICT_16x16_P( sse2 ) -static void predict_8x8c_p( uint8_t *src ) +static void predict_8x8c_p_mmxext( uint8_t *src ) { int a, b, c, i; int H = 0; @@ -109,7 +109,7 @@ static void predict_16x16_dc_##name( uint8_t *src )\ PREDICT_16x16_DC( mmxext ) PREDICT_16x16_DC( sse2 ) -static void predict_8x8c_dc( uint8_t *src ) +static void predict_8x8c_dc_mmxext( uint8_t *src ) { int s2 = 4 + src[-1 + 0*FDEC_STRIDE] @@ -492,34 +492,48 @@ void x264_intra_sa8d_x3_8x8_mmxext( uint8_t *fenc, uint8_t edge[33], int res[3] /**************************************************************************** * Exported functions: ****************************************************************************/ -void x264_predict_16x16_init_mmxext( x264_predict_t pf[7] ) +void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] ) { - pf[I_PRED_16x16_V] = predict_16x16_v_mmx; - pf[I_PRED_16x16_DC] = predict_16x16_dc_mmxext; - pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_mmxext; - pf[I_PRED_16x16_P] = predict_16x16_p_mmxext; - + if( !(cpu&X264_CPU_MMX) ) + return; #ifdef ARCH_X86_64 pf[I_PRED_16x16_H] = predict_16x16_h; pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left; #endif + pf[I_PRED_16x16_V] = predict_16x16_v_mmx; + if( !(cpu&X264_CPU_MMXEXT) ) + return; + pf[I_PRED_16x16_DC] = predict_16x16_dc_mmxext; + pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_mmxext; + pf[I_PRED_16x16_P] = predict_16x16_p_mmxext; + if( !(cpu&X264_CPU_SSE2) || (cpu&X264_CPU_3DNOW) ) + return; + pf[I_PRED_16x16_DC] = predict_16x16_dc_sse2; + pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_sse2; + pf[I_PRED_16x16_V] = predict_16x16_v_sse2; + pf[I_PRED_16x16_P] = predict_16x16_p_sse2; } -void x264_predict_8x8c_init_mmxext( x264_predict_t pf[7] ) +void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] ) { - pf[I_PRED_CHROMA_V] = predict_8x8c_v_mmx; - pf[I_PRED_CHROMA_P] = predict_8x8c_p; - pf[I_PRED_CHROMA_DC] = predict_8x8c_dc; - + if( !(cpu&X264_CPU_MMX) ) + return; #ifdef ARCH_X86_64 pf[I_PRED_CHROMA_H] = predict_8x8c_h; pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left; pf[I_PRED_CHROMA_DC_TOP] = predict_8x8c_dc_top; #endif + pf[I_PRED_CHROMA_V] = predict_8x8c_v_mmx; + if( !(cpu&X264_CPU_MMXEXT) ) + return; + pf[I_PRED_CHROMA_P] = predict_8x8c_p_mmxext; + pf[I_PRED_CHROMA_DC] = predict_8x8c_dc_mmxext; } -void x264_predict_8x8_init_mmxext( x264_predict8x8_t pf[12] ) +void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12] ) { + if( !(cpu&X264_CPU_MMXEXT) ) + return; pf[I_PRED_8x8_V] = predict_8x8_v_mmxext; pf[I_PRED_8x8_DC] = predict_8x8_dc_mmxext; pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_mmxext; @@ -529,31 +543,25 @@ void x264_predict_8x8_init_mmxext( x264_predict8x8_t pf[12] ) #ifdef ARCH_X86 pf[I_PRED_8x8_DDR] = predict_8x8_ddr_mmxext; #endif -} - -void x264_predict_8x8_init_sse2( x264_predict8x8_t pf[12] ) -{ + if( !(cpu&X264_CPU_SSE2) ) + return; pf[I_PRED_8x8_DDL] = predict_8x8_ddl_sse2; pf[I_PRED_8x8_VL] = predict_8x8_vl_sse2; pf[I_PRED_8x8_DDR] = predict_8x8_ddr_sse2; } -void x264_predict_4x4_init_mmxext( x264_predict_t pf[12] ) +void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] ) { - pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext; - pf[I_PRED_4x4_VL] = predict_4x4_vl_mmxext; -#ifdef ARCH_X86_64 // slower on x86 + if( !(cpu&X264_CPU_MMX) ) + return; +#ifdef ARCH_X86_64 pf[I_PRED_4x4_DDR] = predict_4x4_ddr; pf[I_PRED_4x4_VR] = predict_4x4_vr; pf[I_PRED_4x4_HD] = predict_4x4_hd; pf[I_PRED_4x4_HU] = predict_4x4_hu; #endif -} - -void x264_predict_16x16_init_sse2 ( x264_predict_t pf[7] ) -{ - pf[I_PRED_16x16_DC] = predict_16x16_dc_sse2; - pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_sse2; - pf[I_PRED_16x16_V] = predict_16x16_v_sse2; - pf[I_PRED_16x16_P] = predict_16x16_p_sse2; + if( !(cpu&X264_CPU_MMXEXT) ) + return; + pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext; + pf[I_PRED_4x4_VL] = predict_4x4_vl_mmxext; } diff --git a/common/i386/predict.h b/common/i386/predict.h index 49d892d0..4db2e91e 100644 --- a/common/i386/predict.h +++ b/common/i386/predict.h @@ -24,11 +24,9 @@ #ifndef _I386_PREDICT_H #define _I386_PREDICT_H 1 -void x264_predict_16x16_init_mmxext ( x264_predict_t pf[7] ); -void x264_predict_8x8c_init_mmxext ( x264_predict_t pf[7] ); -void x264_predict_4x4_init_mmxext ( x264_predict_t pf[12] ); -void x264_predict_8x8_init_mmxext ( x264_predict8x8_t pf[12] ); -void x264_predict_8x8_init_sse2 ( x264_predict8x8_t pf[12] ); -void x264_predict_16x16_init_sse2 ( x264_predict_t pf[7] ); +void x264_predict_16x16_init_mmx ( int cpu, x264_predict_t pf[7] ); +void x264_predict_8x8c_init_mmx ( int cpu, x264_predict_t pf[7] ); +void x264_predict_4x4_init_mmx ( int cpu, x264_predict_t pf[12] ); +void x264_predict_8x8_init_mmx ( int cpu, x264_predict8x8_t pf[12] ); #endif diff --git a/common/mc.c b/common/mc.c index 6ce44e34..cc7032f0 100644 --- a/common/mc.c +++ b/common/mc.c @@ -375,12 +375,9 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) pf->prefetch_ref = prefetch_ref_null; #ifdef HAVE_MMX - if( cpu&X264_CPU_MMXEXT ) { - x264_mc_mmxext_init( pf ); + x264_mc_init_mmx( cpu, pf ); + if( cpu&X264_CPU_MMXEXT ) pf->mc_chroma = x264_mc_chroma_mmxext; - } - if( cpu&X264_CPU_SSE2 ) - x264_mc_sse2_init( pf ); #endif #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) diff --git a/common/predict.c b/common/predict.c index 3b44723f..57f96215 100644 --- a/common/predict.c +++ b/common/predict.c @@ -847,15 +847,7 @@ void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] ) pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128; #ifdef HAVE_MMX - if( cpu&X264_CPU_MMXEXT ) - { - x264_predict_16x16_init_mmxext( pf ); - } - // disable on AMD processors since it is slower - if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) ) - { - x264_predict_16x16_init_sse2( pf ); - } + x264_predict_16x16_init_mmx( cpu, pf ); #endif #ifdef ARCH_PPC @@ -877,10 +869,7 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] ) pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128; #ifdef HAVE_MMX - if( cpu&X264_CPU_MMXEXT ) - { - x264_predict_8x8c_init_mmxext( pf ); - } + x264_predict_8x8c_init_mmx( cpu, pf ); #endif } @@ -900,15 +889,7 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12] ) pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128; #ifdef HAVE_MMX - if( cpu&X264_CPU_MMXEXT ) - { - x264_predict_8x8_init_mmxext( pf ); - } - // disable on AMD processors since it is slower - if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) ) - { - x264_predict_8x8_init_sse2( pf ); - } + x264_predict_8x8_init_mmx( cpu, pf ); #endif } @@ -928,10 +909,7 @@ void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] ) pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128; #ifdef HAVE_MMX - if( cpu&X264_CPU_MMXEXT ) - { - x264_predict_4x4_init_mmxext( pf ); - } + x264_predict_4x4_init_mmx( cpu, pf ); #endif } -- 2.40.0