From cc0c3d4d1e639512e2b9003a68597fdb6ce00d4f Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Thu, 4 Sep 2008 20:13:38 -0700 Subject: [PATCH] Predict 4x4_DC asm Also remove 5-year-old unnecessary #define that reduced speed unnecessarily under MSVC-compiled builds --- common/predict.c | 3 --- common/x86/predict-a.asm | 25 +++++++++++++++++++++++++ common/x86/predict-c.c | 2 ++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/common/predict.c b/common/predict.c index e6880994..282ae807 100644 --- a/common/predict.c +++ b/common/predict.c @@ -27,9 +27,6 @@ #include "common.h" -#ifdef _MSC_VER -#undef HAVE_MMX /* not finished now */ -#endif #ifdef HAVE_MMX # include "x86/predict.h" #endif diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm index 708859c4..0f64ca21 100644 --- a/common/x86/predict-a.asm +++ b/common/x86/predict-a.asm @@ -145,6 +145,31 @@ cglobal predict_4x4_vl_mmxext, 1,1 RET +;----------------------------------------------------------------------------- +; void predict_4x4_dc( uint8_t *src ) +;----------------------------------------------------------------------------- + +cglobal predict_4x4_dc_mmxext, 1,4 + pxor mm7, mm7 + movd mm0, [r0-FDEC_STRIDE] + psadbw mm0, mm7 + movd r3d, mm0 + movzx r1d, byte [r0-1] +%assign n 1 +%rep 3 + movzx r2d, byte [r0+FDEC_STRIDE*n-1] + add r1d, r2d +%assign n n+1 +%endrep + lea r1d, [r1+r3+4] + shr r1d, 3 + imul r1d, 0x01010101 + mov [r0+FDEC_STRIDE*0], r1d + mov [r0+FDEC_STRIDE*1], r1d + mov [r0+FDEC_STRIDE*2], r1d + mov [r0+FDEC_STRIDE*3], r1d + RET + ;----------------------------------------------------------------------------- ; void predict_8x8_v_mmxext( uint8_t *src, uint8_t *edge ) ;----------------------------------------------------------------------------- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c index d70c25a5..34a98d6d 100644 --- a/common/x86/predict-c.c +++ b/common/x86/predict-c.c @@ -49,6 +49,7 @@ extern void predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[33] ); extern void predict_8x8_vr_core_mmxext( uint8_t *src, uint8_t edge[33] ); extern void predict_4x4_ddl_mmxext( uint8_t *src ); extern void predict_4x4_vl_mmxext( uint8_t *src ); +extern void predict_4x4_dc_mmxext( uint8_t *src ); extern void predict_16x16_dc_top_sse2( uint8_t *src ); extern void predict_16x16_dc_core_sse2( uint8_t *src, int i_dc_left ); extern void predict_16x16_v_sse2( uint8_t *src ); @@ -555,4 +556,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] ) return; pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext; pf[I_PRED_4x4_VL] = predict_4x4_vl_mmxext; + pf[I_PRED_4x4_DC] = predict_4x4_dc_mmxext; } -- 2.40.0