Select asm subroutines more intelligently in the wrapper functions.
#if HAVE_MMX
# include "x86/pixel.h"
+# include "x86/predict.h"
#endif
#if ARCH_PPC
# include "ppc/pixel.h"
#endif
#endif // !HIGH_BIT_DEPTH
-#define INTRA_MBCMP_8x8( mbcmp, cpu )\
+#define INTRA_MBCMP_8x8( mbcmp, cpu, cpu2 )\
void x264_intra_##mbcmp##_x3_8x8##cpu( pixel *fenc, pixel edge[36], int res[3] )\
{\
ALIGNED_ARRAY_16( pixel, pix, [8*FDEC_STRIDE] );\
- x264_predict_8x8_v_c( pix, edge );\
+ x264_predict_8x8_v##cpu2( pix, edge );\
res[0] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
- x264_predict_8x8_h_c( pix, edge );\
+ x264_predict_8x8_h##cpu2( pix, edge );\
res[1] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
- x264_predict_8x8_dc_c( pix, edge );\
+ x264_predict_8x8_dc##cpu2( pix, edge );\
res[2] = x264_pixel_##mbcmp##_8x8##cpu( pix, FDEC_STRIDE, fenc, FENC_STRIDE );\
}
-INTRA_MBCMP_8x8( sad, )
-INTRA_MBCMP_8x8(sa8d, )
+INTRA_MBCMP_8x8( sad,, _c )
+INTRA_MBCMP_8x8(sa8d,, _c )
#if HIGH_BIT_DEPTH && HAVE_MMX
-INTRA_MBCMP_8x8( sad, _mmx2 )
-INTRA_MBCMP_8x8( sad, _sse2 )
-INTRA_MBCMP_8x8( sad, _ssse3 )
-INTRA_MBCMP_8x8(sa8d, _sse2 )
+INTRA_MBCMP_8x8( sad, _mmx2, _c )
+INTRA_MBCMP_8x8( sad, _sse2, _sse2 )
+INTRA_MBCMP_8x8( sad, _ssse3, _sse2 )
+INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 )
#endif
-#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu )\
-void x264_intra_##mbcmp##_x3_##size##x##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
+#define INTRA_MBCMP( mbcmp, size, pred1, pred2, pred3, chroma, cpu, cpu2 )\
+void x264_intra_##mbcmp##_x3_##size##chroma##cpu( pixel *fenc, pixel *fdec, int res[3] )\
{\
- x264_predict_##size##x##size##chroma##_##pred1##_c( fdec );\
- res[0] = x264_pixel_##mbcmp##_##size##x##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
- x264_predict_##size##x##size##chroma##_##pred2##_c( fdec );\
- res[1] = x264_pixel_##mbcmp##_##size##x##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
- x264_predict_##size##x##size##chroma##_##pred3##_c( fdec );\
- res[2] = x264_pixel_##mbcmp##_##size##x##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##chroma##_##pred1##cpu2( fdec );\
+ res[0] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##chroma##_##pred2##cpu2( fdec );\
+ res[1] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
+ x264_predict_##size##chroma##_##pred3##cpu2( fdec );\
+ res[2] = x264_pixel_##mbcmp##_##size##cpu( fdec, FDEC_STRIDE, fenc, FENC_STRIDE );\
}
-INTRA_MBCMP( sad, 4, v, h, dc, , )
-INTRA_MBCMP(satd, 4, v, h, dc, , )
-INTRA_MBCMP( sad, 8, dc, h, v, c, )
-INTRA_MBCMP(satd, 8, dc, h, v, c, )
-INTRA_MBCMP( sad, 16, v, h, dc, , )
-INTRA_MBCMP(satd, 16, v, h, dc, , )
+INTRA_MBCMP( sad, 4x4, v, h, dc, ,, _c )
+INTRA_MBCMP(satd, 4x4, v, h, dc, ,, _c )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c,, _c )
+INTRA_MBCMP(satd, 8x8, dc, h, v, c,, _c )
+INTRA_MBCMP( sad, 16x16, v, h, dc, ,, _c )
+INTRA_MBCMP(satd, 16x16, v, h, dc, ,, _c )
#if HIGH_BIT_DEPTH && HAVE_MMX
-INTRA_MBCMP( sad, 4, v, h, dc, , _mmx2 )
-INTRA_MBCMP(satd, 4, v, h, dc, , _mmx2 )
-INTRA_MBCMP( sad, 8, dc, h, v, c, _mmx2 )
-INTRA_MBCMP(satd, 8, dc, h, v, c, _mmx2 )
-INTRA_MBCMP( sad, 16, v, h, dc, , _mmx2 )
-INTRA_MBCMP(satd, 16, v, h, dc, , _mmx2 )
-INTRA_MBCMP( sad, 8, dc, h, v, c, _sse2 )
-INTRA_MBCMP( sad, 16, v, h, dc, , _sse2 )
-INTRA_MBCMP( sad, 4, v, h, dc, , _ssse3 )
-INTRA_MBCMP( sad, 8, dc, h, v, c, _ssse3 )
-INTRA_MBCMP( sad, 16, v, h, dc, , _ssse3 )
+INTRA_MBCMP( sad, 4x4, v, h, dc, , _mmx2, _c )
+INTRA_MBCMP(satd, 4x4, v, h, dc, , _mmx2, _c )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _mmx2, _c )
+INTRA_MBCMP(satd, 8x8, dc, h, v, c, _mmx2, _c )
+INTRA_MBCMP( sad, 16x16, v, h, dc, , _mmx2, _mmx2 )
+INTRA_MBCMP(satd, 16x16, v, h, dc, , _mmx2, _mmx2 )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _sse2, _sse2 )
+INTRA_MBCMP( sad, 16x16, v, h, dc, , _sse2, _sse2 )
+INTRA_MBCMP( sad, 4x4, v, h, dc, , _ssse3, _c )
+INTRA_MBCMP( sad, 8x8, dc, h, v, c, _ssse3, _sse2 )
+INTRA_MBCMP( sad, 16x16, v, h, dc, , _ssse3, _sse2 )
#endif
/****************************************************************************
;-----------------------------------------------------------------------------
%ifdef HIGH_BIT_DEPTH
INIT_MMX
-cglobal predict_16x16_v_mmx, 1,2
+cglobal predict_16x16_v_mmx2, 1,2
mova m0, [r0 - FDEC_STRIDEB+ 0]
mova m1, [r0 - FDEC_STRIDEB+ 8]
mova m2, [r0 - FDEC_STRIDEB+16]
REP_RET
%else ; !HIGH_BIT_DEPTH
INIT_MMX
-cglobal predict_16x16_v_mmx, 1,2
+cglobal predict_16x16_v_mmx2, 1,2
movq m0, [r0 - FDEC_STRIDE + 0]
movq m1, [r0 - FDEC_STRIDE + 8]
STORE16x16 m0, m1
#include "predict.h"
#include "pixel.h"
- void x264_predict_16x16_v_mmx( pixel *src );
- void x264_predict_16x16_v_sse2( pixel *src );
- void x264_predict_16x16_h_mmx2( pixel *src );
- void x264_predict_16x16_h_sse2( uint16_t *src );
- void x264_predict_16x16_h_ssse3( uint8_t *src );
- void x264_predict_16x16_dc_core_mmx2( pixel *src, int i_dc_left );
- void x264_predict_16x16_dc_core_sse2( pixel *src, int i_dc_left );
- void x264_predict_16x16_dc_left_core_mmx2( pixel *src, int i_dc_left );
- void x264_predict_16x16_dc_left_core_sse2( pixel *src, int i_dc_left );
- void x264_predict_16x16_dc_top_mmx2( pixel *src );
- void x264_predict_16x16_dc_top_sse2( pixel *src );
- void x264_predict_16x16_dc_top_ssse3( uint16_t *src );
- void x264_predict_16x16_p_core_mmx2( uint8_t *src, int i00, int b, int c );
- void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c );
- void x264_predict_16x16_p_core_avx( pixel *src, int i00, int b, int c );
- void x264_predict_8x8c_p_core_mmx2( uint8_t *src, int i00, int b, int c );
- void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c );
- void x264_predict_8x8c_dc_mmx2( pixel *src );
- void x264_predict_8x8c_dc_sse2( uint16_t *src );
- void x264_predict_8x8c_dc_top_mmx2( uint8_t *src );
- void x264_predict_8x8c_dc_top_sse2( uint16_t *src );
- void x264_predict_8x8c_v_mmx( pixel *src );
- void x264_predict_8x8c_v_sse2( uint16_t *src );
- void x264_predict_8x8c_h_mmx2( uint8_t *src );
- void x264_predict_8x8c_h_sse2( pixel *src );
- void x264_predict_8x8c_h_ssse3( uint8_t *src );
- void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[36] );
- void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[36] );
- void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[36] );
- void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[36] );
- void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[36] );
- void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[36] );
- void x264_predict_8x8_ddl_ssse3( pixel *src, pixel edge[36] );
- void x264_predict_8x8_ddl_ssse3_cache64( pixel *src, pixel edge[36] );
- void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[36] );
- void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] );
- void x264_predict_8x8_ddr_ssse3( pixel *src, pixel edge[36] );
- void x264_predict_8x8_ddr_ssse3_cache64( pixel *src, pixel edge[36] );
- void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] );
- void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_vl_avx( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] );
- void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] );
- void x264_predict_8x8_vr_ssse3( pixel *src, pixel edge[36] );
- void x264_predict_8x8_vr_avx( pixel *src, pixel edge[36] );
- void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[36] );
- void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[36] );
- void x264_predict_8x8_hu_avx( pixel *src, pixel edge[36] );
- void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[36] );
- void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[36] );
- void x264_predict_8x8_hd_avx( pixel *src, pixel edge[36] );
- void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters );
- void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
- void x264_predict_4x4_ddl_mmx2( pixel *src );
- void x264_predict_4x4_ddl_sse2( uint16_t *src );
- void x264_predict_4x4_ddl_avx( uint16_t *src );
- void x264_predict_4x4_ddr_mmx2( pixel *src );
- void x264_predict_4x4_vl_mmx2( pixel *src );
- void x264_predict_4x4_vl_sse2( uint16_t *src );
- void x264_predict_4x4_vl_avx( uint16_t *src );
- void x264_predict_4x4_vr_mmx2( uint8_t *src );
- void x264_predict_4x4_vr_sse2( uint16_t *src );
- void x264_predict_4x4_vr_ssse3( pixel *src );
- void x264_predict_4x4_vr_ssse3_cache64( uint8_t *src );
- void x264_predict_4x4_vr_avx( uint16_t *src );
- void x264_predict_4x4_hd_mmx2( pixel *src );
- void x264_predict_4x4_hd_sse2( uint16_t *src );
- void x264_predict_4x4_hd_ssse3( pixel *src );
- void x264_predict_4x4_hd_avx( uint16_t *src );
- void x264_predict_4x4_dc_mmx2( pixel *src );
- void x264_predict_4x4_ddr_sse2( uint16_t *src );
- void x264_predict_4x4_ddr_ssse3( pixel *src );
- void x264_predict_4x4_ddr_avx( uint16_t *src );
- void x264_predict_4x4_hu_mmx2( pixel *src );
-
#define PREDICT_16x16_DC(name)\
-static void x264_predict_16x16_dc_##name( pixel *src )\
+void x264_predict_16x16_dc_##name( pixel *src )\
{\
uint32_t dc = 16;\
for( int i = 0; i < 16; i += 2 )\
****************************************************************************/
void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
{
- if( !(cpu&X264_CPU_MMX) )
+ if( !(cpu&X264_CPU_MMX2) )
return;
- pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx;
- if( cpu&X264_CPU_MMX2 )
- {
- pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_mmx2;
- pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_mmx2;
- pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2;
- pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmx2;
- }
+ pf[I_PRED_16x16_DC] = x264_predict_16x16_dc_mmx2;
+ pf[I_PRED_16x16_DC_TOP] = x264_predict_16x16_dc_top_mmx2;
+ pf[I_PRED_16x16_DC_LEFT] = x264_predict_16x16_dc_left_mmx2;
+ pf[I_PRED_16x16_V] = x264_predict_16x16_v_mmx2;
+ pf[I_PRED_16x16_H] = x264_predict_16x16_h_mmx2;
#if HIGH_BIT_DEPTH
if( !(cpu&X264_CPU_SSE2) )
return;
void x264_predict_8x8c_init_mmx ( int cpu, x264_predict_t pf[7] );
void x264_predict_4x4_init_mmx ( int cpu, x264_predict_t pf[12] );
void x264_predict_8x8_init_mmx ( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_filter_t *predict_8x8_filter );
+
+void x264_predict_16x16_v_mmx2( pixel *src );
+void x264_predict_16x16_v_sse2( pixel *src );
+void x264_predict_16x16_h_mmx2( pixel *src );
+void x264_predict_16x16_h_sse2( uint16_t *src );
+void x264_predict_16x16_h_ssse3( uint8_t *src );
+void x264_predict_16x16_dc_mmx2( pixel *src );
+void x264_predict_16x16_dc_sse2( pixel *src );
+void x264_predict_16x16_dc_core_mmx2( pixel *src, int i_dc_left );
+void x264_predict_16x16_dc_core_sse2( pixel *src, int i_dc_left );
+void x264_predict_16x16_dc_left_core_mmx2( pixel *src, int i_dc_left );
+void x264_predict_16x16_dc_left_core_sse2( pixel *src, int i_dc_left );
+void x264_predict_16x16_dc_top_mmx2( pixel *src );
+void x264_predict_16x16_dc_top_sse2( pixel *src );
+void x264_predict_16x16_dc_top_ssse3( uint16_t *src );
+void x264_predict_16x16_p_core_mmx2( uint8_t *src, int i00, int b, int c );
+void x264_predict_16x16_p_core_sse2( pixel *src, int i00, int b, int c );
+void x264_predict_16x16_p_core_avx( pixel *src, int i00, int b, int c );
+void x264_predict_8x8c_p_core_mmx2( uint8_t *src, int i00, int b, int c );
+void x264_predict_8x8c_p_core_sse2( pixel *src, int i00, int b, int c );
+void x264_predict_8x8c_dc_mmx2( pixel *src );
+void x264_predict_8x8c_dc_sse2( uint16_t *src );
+void x264_predict_8x8c_dc_top_mmx2( uint8_t *src );
+void x264_predict_8x8c_dc_top_sse2( uint16_t *src );
+void x264_predict_8x8c_v_mmx( pixel *src );
+void x264_predict_8x8c_v_sse2( uint16_t *src );
+void x264_predict_8x8c_h_mmx2( uint8_t *src );
+void x264_predict_8x8c_h_sse2( pixel *src );
+void x264_predict_8x8c_h_ssse3( uint8_t *src );
+void x264_predict_8x8_v_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_v_sse2( uint16_t *src, uint16_t edge[36] );
+void x264_predict_8x8_h_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_h_sse2( uint16_t *src, uint16_t edge[36] );
+void x264_predict_8x8_hd_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_hu_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_dc_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_dc_sse2( uint16_t *src, uint16_t edge[36] );
+void x264_predict_8x8_dc_top_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_dc_top_sse2( uint16_t *src, uint16_t edge[36] );
+void x264_predict_8x8_dc_left_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_dc_left_sse2( uint16_t *src, uint16_t edge[36] );
+void x264_predict_8x8_ddl_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddl_sse2( pixel *src, pixel edge[36] );
+void x264_predict_8x8_ddl_ssse3( pixel *src, pixel edge[36] );
+void x264_predict_8x8_ddl_ssse3_cache64( pixel *src, pixel edge[36] );
+void x264_predict_8x8_ddl_avx( pixel *src, pixel edge[36] );
+void x264_predict_8x8_ddr_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_ddr_sse2( pixel *src, pixel edge[36] );
+void x264_predict_8x8_ddr_ssse3( pixel *src, pixel edge[36] );
+void x264_predict_8x8_ddr_ssse3_cache64( pixel *src, pixel edge[36] );
+void x264_predict_8x8_ddr_avx( pixel *src, pixel edge[36] );
+void x264_predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vl_avx( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vr_mmx2( uint8_t *src, uint8_t edge[36] );
+void x264_predict_8x8_vr_sse2( pixel *src, pixel edge[36] );
+void x264_predict_8x8_vr_ssse3( pixel *src, pixel edge[36] );
+void x264_predict_8x8_vr_avx( pixel *src, pixel edge[36] );
+void x264_predict_8x8_hu_sse2( pixel *src, pixel edge[36] );
+void x264_predict_8x8_hu_ssse3( pixel *src, pixel edge[36] );
+void x264_predict_8x8_hu_avx( pixel *src, pixel edge[36] );
+void x264_predict_8x8_hd_sse2( pixel *src, pixel edge[36] );
+void x264_predict_8x8_hd_ssse3( pixel *src, pixel edge[36] );
+void x264_predict_8x8_hd_avx( pixel *src, pixel edge[36] );
+void x264_predict_8x8_filter_mmx2( uint8_t *src, uint8_t edge[36], int i_neighbor, int i_filters );
+void x264_predict_8x8_filter_sse2( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
+void x264_predict_8x8_filter_ssse3( pixel *src, pixel edge[36], int i_neighbor, int i_filters );
+void x264_predict_8x8_filter_avx( uint16_t *src, uint16_t edge[36], int i_neighbor, int i_filters );
+void x264_predict_4x4_ddl_mmx2( pixel *src );
+void x264_predict_4x4_ddl_sse2( uint16_t *src );
+void x264_predict_4x4_ddl_avx( uint16_t *src );
+void x264_predict_4x4_ddr_mmx2( pixel *src );
+void x264_predict_4x4_vl_mmx2( pixel *src );
+void x264_predict_4x4_vl_sse2( uint16_t *src );
+void x264_predict_4x4_vl_avx( uint16_t *src );
+void x264_predict_4x4_vr_mmx2( uint8_t *src );
+void x264_predict_4x4_vr_sse2( uint16_t *src );
+void x264_predict_4x4_vr_ssse3( pixel *src );
+void x264_predict_4x4_vr_ssse3_cache64( uint8_t *src );
+void x264_predict_4x4_vr_avx( uint16_t *src );
+void x264_predict_4x4_hd_mmx2( pixel *src );
+void x264_predict_4x4_hd_sse2( uint16_t *src );
+void x264_predict_4x4_hd_ssse3( pixel *src );
+void x264_predict_4x4_hd_avx( uint16_t *src );
+void x264_predict_4x4_dc_mmx2( pixel *src );
+void x264_predict_4x4_ddr_sse2( uint16_t *src );
+void x264_predict_4x4_ddr_ssse3( pixel *src );
+void x264_predict_4x4_ddr_avx( uint16_t *src );
+void x264_predict_4x4_hu_mmx2( pixel *src );
+
#endif