From: Fiona Glaser Date: Thu, 12 Jun 2008 14:09:22 +0000 (-0600) Subject: More tweaks to me.c X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5204112861581df847a4a892ea63b8a0d72f2e6c;p=libx264 More tweaks to me.c Added inline MMX version of UMH's predictor difference test Various cosmetics throughout me.c Removed a C99-ism introduced in r878. --- diff --git a/common/common.h b/common/common.h index 84d6d7f2..572231ae 100644 --- a/common/common.h +++ b/common/common.h @@ -130,6 +130,17 @@ static inline void x264_median_mv( int16_t *dst, int16_t *a, int16_t *b, int16_t dst[1] = x264_median( a[1], b[1], c[1] ); } +static inline int x264_predictor_difference( int16_t (*mvc)[2], int i_mvc ) +{ + int sum = 0, i; + for( i = 0; i < i_mvc-1; i++ ) + { + sum += abs( mvc[i][0] - mvc[i+1][0] ) + + abs( mvc[i][1] - mvc[i+1][1] ); + } + return sum; +} + #ifdef HAVE_MMX #include "x86/util.h" #endif diff --git a/common/x86/util.h b/common/x86/util.h index 73f49046..4b149f28 100644 --- a/common/x86/util.h +++ b/common/x86/util.h @@ -39,6 +39,37 @@ static inline void x264_median_mv_mmxext( int16_t *dst, int16_t *a, int16_t *b, :"m"(*(uint32_t*)a), "m"(*(uint32_t*)b), "m"(*(uint32_t*)c) ); } +#define x264_predictor_difference x264_predictor_difference_mmxext +static inline int x264_predictor_difference_mmxext( int16_t (*mvc)[2], int i_mvc ) +{ + int sum = 0; + uint16_t output[4]; + asm( + "pxor %%mm4, %%mm4 \n" + "test $1, %1 \n" + "jnz 3f \n" + "movd -8(%2,%1,4), %%mm0 \n" + "movd -4(%2,%1,4), %%mm3 \n" + "psubw %%mm3, %%mm0 \n" + "jmp 2f \n" + "3: \n" + "sub $1, %1 \n" + "1: \n" + "movq -8(%2,%1,4), %%mm0 \n" + "psubw -4(%2,%1,4), %%mm0 \n" + "2: \n" + "sub $2, %1 \n" + "pxor %%mm2, %%mm2 \n" + "psubw %%mm0, %%mm2 \n" + "pmaxsw %%mm2, %%mm0 \n" + "paddusw %%mm0, %%mm4 \n" + "jg 1b \n" + "movq %%mm4, %0 \n" + :"=m"(output), "+r"(i_mvc), "+r"(mvc) + ); + sum += output[0] + output[1] + output[2] + output[3]; + return sum; +} #endif #endif diff --git a/encoder/me.c b/encoder/me.c index 276f5432..216c909a 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -186,8 +186,8 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, /* try extra predictors if provided */ if( h->mb.i_subpel_refine >= 3 ) { - COST_MV_HPEL( bmx, bmy ); uint32_t bmv = pack16to32_mask(bmx,bmy); + COST_MV_HPEL( bmx, bmy ); do { if( *(uint32_t*)mvc[i] && (bmv - *(uint32_t*)mvc[i]) ) @@ -235,7 +235,7 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, for( i = 0; i < i_me_range; i++ ) { DIA1_ITER( bmx, bmy ); - if( bmx == omx && bmy == omy ) + if( (bmx == omx) & (bmy == omy) ) break; if( !CHECK_MVRANGE(bmx, bmy) ) break; @@ -389,9 +389,7 @@ me_hex2: + abs( m->mvp[1] - mvc[0][1] ); denom++; } - for( i = 0; i < i_mvc-1; i++ ) - mvd += abs( mvc[i][0] - mvc[i+1][0] ) - + abs( mvc[i][1] - mvc[i+1][1] ); + mvd += x264_predictor_difference( mvc, i_mvc ); } sad_ctx = SAD_THRESH(1000) ? 0 @@ -689,13 +687,12 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite int bcost = m->cost; int odir = -1, bdir; - /* try the subpel component of the predicted mv */ if( hpel_iters && h->mb.i_subpel_refine < 3 ) { int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); - if( mx != bmx || my != bmy ) + if( (mx-bmx)|(my-bmy) ) COST_MV_SAD( mx, my ); } @@ -715,7 +712,7 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite COPY2_IF_LT( bcost, costs[1] + p_cost_mvx[omx ] + p_cost_mvy[omy+2], bmy, omy+2 ); COPY3_IF_LT( bcost, costs[2] + p_cost_mvx[omx-2] + p_cost_mvy[omy ], bmx, omx-2, bmy, omy ); COPY3_IF_LT( bcost, costs[3] + p_cost_mvx[omx+2] + p_cost_mvy[omy ], bmx, omx+2, bmy, omy ); - if( bmx == omx && bmy == omy ) + if( (bmx == omx) & (bmy == omy) ) break; }