From: Fiona Glaser Date: Fri, 5 Sep 2008 03:13:38 +0000 (-0700) Subject: Predict 4x4_DC asm X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cc0c3d4d1e639512e2b9003a68597fdb6ce00d4f;p=libx264 Predict 4x4_DC asm Also remove 5-year-old unnecessary #define that reduced speed unnecessarily under MSVC-compiled builds --- diff --git a/common/predict.c b/common/predict.c index e6880994..282ae807 100644 --- a/common/predict.c +++ b/common/predict.c @@ -27,9 +27,6 @@ #include "common.h" -#ifdef _MSC_VER -#undef HAVE_MMX /* not finished now */ -#endif #ifdef HAVE_MMX # include "x86/predict.h" #endif diff --git a/common/x86/predict-a.asm b/common/x86/predict-a.asm index 708859c4..0f64ca21 100644 --- a/common/x86/predict-a.asm +++ b/common/x86/predict-a.asm @@ -145,6 +145,31 @@ cglobal predict_4x4_vl_mmxext, 1,1 RET +;----------------------------------------------------------------------------- +; void predict_4x4_dc( uint8_t *src ) +;----------------------------------------------------------------------------- + +cglobal predict_4x4_dc_mmxext, 1,4 + pxor mm7, mm7 + movd mm0, [r0-FDEC_STRIDE] + psadbw mm0, mm7 + movd r3d, mm0 + movzx r1d, byte [r0-1] +%assign n 1 +%rep 3 + movzx r2d, byte [r0+FDEC_STRIDE*n-1] + add r1d, r2d +%assign n n+1 +%endrep + lea r1d, [r1+r3+4] + shr r1d, 3 + imul r1d, 0x01010101 + mov [r0+FDEC_STRIDE*0], r1d + mov [r0+FDEC_STRIDE*1], r1d + mov [r0+FDEC_STRIDE*2], r1d + mov [r0+FDEC_STRIDE*3], r1d + RET + ;----------------------------------------------------------------------------- ; void predict_8x8_v_mmxext( uint8_t *src, uint8_t *edge ) ;----------------------------------------------------------------------------- diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c index d70c25a5..34a98d6d 100644 --- a/common/x86/predict-c.c +++ b/common/x86/predict-c.c @@ -49,6 +49,7 @@ extern void predict_8x8_vl_sse2( uint8_t *src, uint8_t edge[33] ); extern void predict_8x8_vr_core_mmxext( uint8_t *src, uint8_t edge[33] ); extern void predict_4x4_ddl_mmxext( uint8_t *src ); extern void predict_4x4_vl_mmxext( uint8_t *src ); +extern void predict_4x4_dc_mmxext( uint8_t *src ); extern void predict_16x16_dc_top_sse2( uint8_t *src ); extern void predict_16x16_dc_core_sse2( uint8_t *src, int i_dc_left ); extern void predict_16x16_v_sse2( uint8_t *src ); @@ -555,4 +556,5 @@ void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] ) return; pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext; pf[I_PRED_4x4_VL] = predict_4x4_vl_mmxext; + pf[I_PRED_4x4_DC] = predict_4x4_dc_mmxext; }