From 42e179e84b8563eff62efcfbee0d947f09100fd4 Mon Sep 17 00:00:00 2001 From: Fiona Glaser Date: Fri, 19 Jun 2009 16:03:18 -0700 Subject: [PATCH] MMX CABAC mvd sum calculation Faster CABAC mvd coding. --- common/common.h | 9 +++++++++ common/x86/util.h | 29 +++++++++++++++++++++++++++++ encoder/cabac.c | 12 ++++++------ 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/common/common.h b/common/common.h index aa944f3a..1e46ae8e 100644 --- a/common/common.h +++ b/common/common.h @@ -143,6 +143,15 @@ static inline int x264_predictor_difference( int16_t (*mvc)[2], intptr_t i_mvc ) return sum; } +static inline uint32_t x264_cabac_amvd_sum( int16_t *mvdleft, int16_t *mvdtop ) +{ + int amvd0 = abs(mvdleft[0]) + abs(mvdtop[0]); + int amvd1 = abs(mvdleft[1]) + abs(mvdtop[1]); + amvd0 = (amvd0 > 2) + (amvd0 > 32); + amvd1 = (amvd1 > 2) + (amvd1 > 32); + return amvd0 + (amvd1<<16); +} + /**************************************************************************** * ****************************************************************************/ diff --git a/common/x86/util.h b/common/x86/util.h index b86f37aa..ac4ece65 100644 --- a/common/x86/util.h +++ b/common/x86/util.h @@ -107,6 +107,35 @@ static ALWAYS_INLINE int array_non_zero_int_mmx( void *v, int i_count ) } else return array_non_zero_int_c( v, i_count ); } +#define x264_cabac_amvd_sum x264_cabac_amvd_sum_mmxext +static ALWAYS_INLINE uint32_t x264_cabac_amvd_sum_mmxext(int16_t *mvdleft, int16_t *mvdtop) +{ + static const uint64_t pw_2 = 0x0002000200020002ULL; + static const uint64_t pw_28 = 0x001C001C001C001CULL; + static const uint64_t pw_2184 = 0x0888088808880888ULL; + /* MIN(((x+28)*2184)>>16,2) = (x>2) + (x>32) */ + /* 2184 = fix16(1/30) */ + uint32_t amvd; + asm( + "movd %1, %%mm0 \n" + "movd %2, %%mm1 \n" + "pxor %%mm2, %%mm2 \n" + "pxor %%mm3, %%mm3 \n" + "psubw %%mm0, %%mm2 \n" + "psubw %%mm1, %%mm3 \n" + "pmaxsw %%mm2, %%mm0 \n" + "pmaxsw %%mm3, %%mm1 \n" + "paddw %3, %%mm0 \n" + "paddw %%mm1, %%mm0 \n" + "pmulhuw %4, %%mm0 \n" + "pminsw %5, %%mm0 \n" + "movd %%mm0, %0 \n" + :"=r"(amvd) + :"m"(*(uint32_t*)mvdleft),"m"(*(uint32_t*)mvdtop), + "m"(pw_28),"m"(pw_2184),"m"(pw_2) + ); + return amvd; +} #endif #endif diff --git a/encoder/cabac.c b/encoder/cabac.c index 1ffd28b2..9bc37148 100644 --- a/encoder/cabac.c +++ b/encoder/cabac.c @@ -390,14 +390,11 @@ static void x264_cabac_mb_ref( x264_t *h, x264_cabac_t *cb, int i_list, int idx x264_cabac_encode_decision( cb, 54 + ctx, 0 ); } -static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd ) +static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int l, int mvd, int ctx ) { static const uint8_t ctxes[9] = { 0,3,4,5,6,6,6,6,6 }; - const int amvd = abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 1][l] ) + - abs( h->mb.cache.mvd[i_list][x264_scan8[idx] - 8][l] ); const int i_abs = abs( mvd ); const int ctxbase = l ? 47 : 40; - int ctx = (amvd>2) + (amvd>32); int i; if( i_abs == 0 ) @@ -443,16 +440,19 @@ static inline void x264_cabac_mb_mvd_cpn( x264_t *h, x264_cabac_t *cb, int i_lis static NOINLINE uint32_t x264_cabac_mb_mvd( x264_t *h, x264_cabac_t *cb, int i_list, int idx, int width, int height ) { DECLARE_ALIGNED_4( int16_t mvp[2] ); + uint32_t amvd; int mdx, mdy; /* Calculate mvd */ x264_mb_predict_mv( h, i_list, idx, width, mvp ); mdx = h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0]; mdy = h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1]; + amvd = x264_cabac_amvd_sum(h->mb.cache.mvd[i_list][x264_scan8[idx] - 1], + h->mb.cache.mvd[i_list][x264_scan8[idx] - 8]); /* encode */ - x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx ); - x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy ); + x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 0, mdx, amvd&0xFFFF ); + x264_cabac_mb_mvd_cpn( h, cb, i_list, idx, 1, mdy, amvd>>16 ); return pack16to32_mask(mdx,mdy); } -- 2.40.0