static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
-void mc_luma_mmx( uint8_t *dst, int i_dst_stride,
- uint8_t *src[4], int i_src_stride,
- int mvx, int mvy,
- int i_width, int i_height )
+void mc_luma_mmxext( uint8_t *dst, int i_dst_stride,
+ uint8_t *src[4], int i_src_stride,
+ int mvx, int mvy,
+ int i_width, int i_height )
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2);
}
}
-uint8_t *get_ref_mmx( uint8_t *dst, int *i_dst_stride,
- uint8_t *src[4], int i_src_stride,
- int mvx, int mvy,
- int i_width, int i_height )
+uint8_t *get_ref_mmxext( uint8_t *dst, int *i_dst_stride,
+ uint8_t *src[4], int i_src_stride,
+ int mvx, int mvy,
+ int i_width, int i_height )
{
int qpel_idx = ((mvy&3)<<2) + (mvx&3);
int offset = (mvy>>2)*i_src_stride + (mvx>>2);
}
-void x264_mc_mmxext_init( x264_mc_functions_t *pf )
+void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
{
- pf->mc_luma = mc_luma_mmx;
- pf->get_ref = get_ref_mmx;
+ if( !(cpu&X264_CPU_MMX) )
+ return;
+
+ pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
+ pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
+ pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
+
+ if( !(cpu&X264_CPU_MMXEXT) )
+ return;
+
+ pf->mc_luma = mc_luma_mmxext;
+ pf->get_ref = get_ref_mmxext;
pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmxext;
pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_mmxext;
pf->avg_weight[PIXEL_4x4] = x264_pixel_avg_weight_4x4_mmxext;
// avg_weight_4x8 is rare and 4x2 is not used
- pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
- pf->copy[PIXEL_8x8] = x264_mc_copy_w8_mmx;
- pf->copy[PIXEL_4x4] = x264_mc_copy_w4_mmx;
-
pf->plane_copy = x264_plane_copy_mmxext;
pf->hpel_filter = x264_hpel_filter_mmxext;
pf->prefetch_fenc = x264_prefetch_fenc_mmxext;
pf->prefetch_ref = x264_prefetch_ref_mmxext;
-}
-void x264_mc_sse2_init( x264_mc_functions_t *pf )
-{
+
/* todo: use sse2 */
}
#ifndef _I386_MC_H
#define _I386_MC_H 1
-void x264_mc_mmxext_init( x264_mc_functions_t *pf );
-void x264_mc_sse2_init( x264_mc_functions_t *pf );
+void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf );
void x264_mc_chroma_mmxext( uint8_t *src, int i_src_stride,
uint8_t *dst, int i_dst_stride,
PREDICT_16x16_P( mmxext )
PREDICT_16x16_P( sse2 )
-static void predict_8x8c_p( uint8_t *src )
+static void predict_8x8c_p_mmxext( uint8_t *src )
{
int a, b, c, i;
int H = 0;
PREDICT_16x16_DC( mmxext )
PREDICT_16x16_DC( sse2 )
-static void predict_8x8c_dc( uint8_t *src )
+static void predict_8x8c_dc_mmxext( uint8_t *src )
{
int s2 = 4
+ src[-1 + 0*FDEC_STRIDE]
/****************************************************************************
* Exported functions:
****************************************************************************/
-void x264_predict_16x16_init_mmxext( x264_predict_t pf[7] )
+void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
{
- pf[I_PRED_16x16_V] = predict_16x16_v_mmx;
- pf[I_PRED_16x16_DC] = predict_16x16_dc_mmxext;
- pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_mmxext;
- pf[I_PRED_16x16_P] = predict_16x16_p_mmxext;
-
+ if( !(cpu&X264_CPU_MMX) )
+ return;
#ifdef ARCH_X86_64
pf[I_PRED_16x16_H] = predict_16x16_h;
pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left;
#endif
+ pf[I_PRED_16x16_V] = predict_16x16_v_mmx;
+ if( !(cpu&X264_CPU_MMXEXT) )
+ return;
+ pf[I_PRED_16x16_DC] = predict_16x16_dc_mmxext;
+ pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_mmxext;
+ pf[I_PRED_16x16_P] = predict_16x16_p_mmxext;
+ if( !(cpu&X264_CPU_SSE2) || (cpu&X264_CPU_3DNOW) )
+ return;
+ pf[I_PRED_16x16_DC] = predict_16x16_dc_sse2;
+ pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_sse2;
+ pf[I_PRED_16x16_V] = predict_16x16_v_sse2;
+ pf[I_PRED_16x16_P] = predict_16x16_p_sse2;
}
-void x264_predict_8x8c_init_mmxext( x264_predict_t pf[7] )
+void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
{
- pf[I_PRED_CHROMA_V] = predict_8x8c_v_mmx;
- pf[I_PRED_CHROMA_P] = predict_8x8c_p;
- pf[I_PRED_CHROMA_DC] = predict_8x8c_dc;
-
+ if( !(cpu&X264_CPU_MMX) )
+ return;
#ifdef ARCH_X86_64
pf[I_PRED_CHROMA_H] = predict_8x8c_h;
pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left;
pf[I_PRED_CHROMA_DC_TOP] = predict_8x8c_dc_top;
#endif
+ pf[I_PRED_CHROMA_V] = predict_8x8c_v_mmx;
+ if( !(cpu&X264_CPU_MMXEXT) )
+ return;
+ pf[I_PRED_CHROMA_P] = predict_8x8c_p_mmxext;
+ pf[I_PRED_CHROMA_DC] = predict_8x8c_dc_mmxext;
}
-void x264_predict_8x8_init_mmxext( x264_predict8x8_t pf[12] )
+void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12] )
{
+ if( !(cpu&X264_CPU_MMXEXT) )
+ return;
pf[I_PRED_8x8_V] = predict_8x8_v_mmxext;
pf[I_PRED_8x8_DC] = predict_8x8_dc_mmxext;
pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_mmxext;
#ifdef ARCH_X86
pf[I_PRED_8x8_DDR] = predict_8x8_ddr_mmxext;
#endif
-}
-
-void x264_predict_8x8_init_sse2( x264_predict8x8_t pf[12] )
-{
+ if( !(cpu&X264_CPU_SSE2) )
+ return;
pf[I_PRED_8x8_DDL] = predict_8x8_ddl_sse2;
pf[I_PRED_8x8_VL] = predict_8x8_vl_sse2;
pf[I_PRED_8x8_DDR] = predict_8x8_ddr_sse2;
}
-void x264_predict_4x4_init_mmxext( x264_predict_t pf[12] )
+void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
{
- pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext;
- pf[I_PRED_4x4_VL] = predict_4x4_vl_mmxext;
-#ifdef ARCH_X86_64 // slower on x86
+ if( !(cpu&X264_CPU_MMX) )
+ return;
+#ifdef ARCH_X86_64
pf[I_PRED_4x4_DDR] = predict_4x4_ddr;
pf[I_PRED_4x4_VR] = predict_4x4_vr;
pf[I_PRED_4x4_HD] = predict_4x4_hd;
pf[I_PRED_4x4_HU] = predict_4x4_hu;
#endif
-}
-
-void x264_predict_16x16_init_sse2 ( x264_predict_t pf[7] )
-{
- pf[I_PRED_16x16_DC] = predict_16x16_dc_sse2;
- pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_sse2;
- pf[I_PRED_16x16_V] = predict_16x16_v_sse2;
- pf[I_PRED_16x16_P] = predict_16x16_p_sse2;
+ if( !(cpu&X264_CPU_MMXEXT) )
+ return;
+ pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext;
+ pf[I_PRED_4x4_VL] = predict_4x4_vl_mmxext;
}
#ifndef _I386_PREDICT_H
#define _I386_PREDICT_H 1
-void x264_predict_16x16_init_mmxext ( x264_predict_t pf[7] );
-void x264_predict_8x8c_init_mmxext ( x264_predict_t pf[7] );
-void x264_predict_4x4_init_mmxext ( x264_predict_t pf[12] );
-void x264_predict_8x8_init_mmxext ( x264_predict8x8_t pf[12] );
-void x264_predict_8x8_init_sse2 ( x264_predict8x8_t pf[12] );
-void x264_predict_16x16_init_sse2 ( x264_predict_t pf[7] );
+void x264_predict_16x16_init_mmx ( int cpu, x264_predict_t pf[7] );
+void x264_predict_8x8c_init_mmx ( int cpu, x264_predict_t pf[7] );
+void x264_predict_4x4_init_mmx ( int cpu, x264_predict_t pf[12] );
+void x264_predict_8x8_init_mmx ( int cpu, x264_predict8x8_t pf[12] );
#endif
pf->prefetch_ref = prefetch_ref_null;
#ifdef HAVE_MMX
- if( cpu&X264_CPU_MMXEXT ) {
- x264_mc_mmxext_init( pf );
+ x264_mc_init_mmx( cpu, pf );
+ if( cpu&X264_CPU_MMXEXT )
pf->mc_chroma = x264_mc_chroma_mmxext;
- }
- if( cpu&X264_CPU_SSE2 )
- x264_mc_sse2_init( pf );
#endif
#ifdef ARCH_PPC
if( cpu&X264_CPU_ALTIVEC )
pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128;
#ifdef HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- x264_predict_16x16_init_mmxext( pf );
- }
- // disable on AMD processors since it is slower
- if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
- {
- x264_predict_16x16_init_sse2( pf );
- }
+ x264_predict_16x16_init_mmx( cpu, pf );
#endif
#ifdef ARCH_PPC
pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128;
#ifdef HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- x264_predict_8x8c_init_mmxext( pf );
- }
+ x264_predict_8x8c_init_mmx( cpu, pf );
#endif
}
pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128;
#ifdef HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- x264_predict_8x8_init_mmxext( pf );
- }
- // disable on AMD processors since it is slower
- if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
- {
- x264_predict_8x8_init_sse2( pf );
- }
+ x264_predict_8x8_init_mmx( cpu, pf );
#endif
}
pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128;
#ifdef HAVE_MMX
- if( cpu&X264_CPU_MMXEXT )
- {
- x264_predict_4x4_init_mmxext( pf );
- }
+ x264_predict_4x4_init_mmx( cpu, pf );
#endif
}