From: Loren Merritt Date: Wed, 18 Jan 2006 07:42:29 +0000 (+0000) Subject: early termination in UMH search X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d125e4373da6f5e50d2e4ab73aab2d97732212c5;p=libx264 early termination in UMH search git-svn-id: svn://svn.videolan.org/x264/trunk@406 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/encoder/me.c b/encoder/me.c index 535bf87c..16831f3d 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -83,11 +83,53 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite COST_MV( omx+1, omy );\ } +#define DIA2 \ + {\ + COST_MV( omx , omy-2 );\ + COST_MV( omx-1, omy-1 );/* 1 */\ + COST_MV( omx+1, omy-1 );/* 1 1 */\ + COST_MV( omx-2, omy );/* 1 0 1 */\ + COST_MV( omx+2, omy );/* 1 1 */\ + COST_MV( omx-1, omy+1 );/* 1 */\ + COST_MV( omx+1, omy+1 );\ + COST_MV( omx , omy+2 );\ + }\ + +#define OCT2 \ + {\ + COST_MV( omx-1, omy-2 );\ + COST_MV( omx+1, omy-2 );/* 1 1 */\ + COST_MV( omx-2, omy-1 );/* 1 1 */\ + COST_MV( omx+2, omy-1 );/* 0 */\ + COST_MV( omx-2, omy+1 );/* 1 1 */\ + COST_MV( omx+2, omy+1 );/* 1 1 */\ + COST_MV( omx-1, omy+2 );\ + COST_MV( omx+1, omy+2 );\ + } + +#define CROSS( start, x_max, y_max ) \ + { \ + for( i = start; i < x_max; i+=2 ) \ + { \ + if( omx + i <= mv_x_max ) \ + COST_MV( omx + i, omy ); \ + if( omx - i >= mv_x_min ) \ + COST_MV( omx - i, omy ); \ + } \ + for( i = start; i < y_max; i+=2 ) \ + { \ + if( omy + i <= mv_y_max ) \ + COST_MV( omx, omy + i ); \ + if( omy - i >= mv_y_min ) \ + COST_MV( omx, omy - i ); \ + } \ + } + void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_halfpel_thresh ) { const int i_pixel = m->i_pixel; - const int i_me_range = h->param.analyse.i_me_range; + int i_me_range = h->param.analyse.i_me_range; int bmx, bmy, bcost; int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; @@ -197,12 +239,15 @@ me_hex2: case X264_ME_UMH: { /* Uneven-cross Multi-Hexagon-grid Search - * as in JM, except without early termination */ + * as in JM, except with different early termination */ + + static const int x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 }; - int ucost; - int cross_start; + int ucost1, ucost2; + int cross_start = 1; /* refine predictors */ + ucost1 = bcost; DIA1_ITER( pmx, pmy ); if( pmx || pmy ) DIA1_ITER( 0, 0 ); @@ -210,31 +255,93 @@ me_hex2: if(i_pixel == PIXEL_4x4) goto me_hex2; - ucost = bcost; + ucost2 = bcost; if( (bmx || bmy) && (bmx!=pmx || bmy!=pmy) ) DIA1_ITER( bmx, bmy ); - - /* cross */ + if( bcost == ucost2 ) + cross_start = 3; omx = bmx; omy = bmy; - cross_start = ( bcost == ucost ) ? 3 : 1; - for( i = cross_start; i < i_me_range; i+=2 ) + + /* early termination */ +#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) ) + if( bcost == ucost2 && SAD_THRESH(2000) ) { - if( omx + i <= mv_x_max ) - COST_MV( omx + i, omy ); - if( omx - i >= mv_x_min ) - COST_MV( omx - i, omy ); + DIA2; + if( bcost == ucost1 && SAD_THRESH(500) ) + break; + if( bcost == ucost2 ) + { + int range = (i_me_range>>1) | 1; + CROSS( 3, range, range ); + OCT2; + if( bcost == ucost2 ) + break; + cross_start = range + 2; + } } - for( i = cross_start; i < i_me_range/2; i+=2 ) + + /* adaptive search range */ + if( i_mvc ) { - if( omy + i <= mv_y_max ) - COST_MV( omx, omy + i ); - if( omy - i >= mv_y_min ) - COST_MV( omx, omy - i ); + /* range multipliers based on casual inspection of some statistics of + * average distance between current predictor and final mv found by ESA. + * these have not been tuned much by actual encoding. */ + static const int range_mul[4][4] = + { + { 3, 3, 4, 4 }, + { 3, 4, 4, 4 }, + { 4, 4, 4, 5 }, + { 4, 4, 5, 6 }, + }; + int mvd; + int sad_ctx, mvd_ctx; + + if( i_mvc == 1 ) + { + if( i_pixel == PIXEL_16x16 ) + /* mvc is probably the same as mvp, so the difference isn't meaningful. + * but prediction usually isn't too bad, so just use medium range */ + mvd = 25; + else + mvd = abs( m->mvp[0] - mvc[0][0] ) + + abs( m->mvp[1] - mvc[0][1] ); + } + else + { + /* calculate the degree of agreement between predictors. */ + /* in 16x16, mvc includes all the neighbors used to make mvp, + * so don't count mvp separately. */ + int i_denom = i_mvc - 1; + mvd = 0; + if( i_pixel != PIXEL_16x16 ) + { + mvd = abs( m->mvp[0] - mvc[0][0] ) + + abs( m->mvp[1] - mvc[0][1] ); + i_denom++; + } + for( i = 0; i < i_mvc-1; i++ ) + mvd += abs( mvc[i][0] - mvc[i+1][0] ) + + abs( mvc[i][1] - mvc[i+1][1] ); + mvd /= i_denom; //FIXME idiv + } + + sad_ctx = SAD_THRESH(1000) ? 0 + : SAD_THRESH(2000) ? 1 + : SAD_THRESH(4000) ? 2 : 3; + mvd_ctx = mvd < 10 ? 0 + : mvd < 20 ? 1 + : mvd < 40 ? 2 : 3; + + i_me_range = i_me_range * range_mul[mvd_ctx][sad_ctx] / 4; } + /* FIXME if the above DIA2/OCT2/CROSS found a new mv, it has not updated omx/omy. + * we are still centered on the same place as the DIA2. is this desirable? */ + CROSS( cross_start, i_me_range, i_me_range/2 ); + /* 5x5 ESA */ omx = bmx; omy = bmy; - for( i = (bcost == ucost) ? 4 : 0; i < 24; i++ ) + for( i = (bcost == ucost2) ? 4 : 0; i < 24; i++ ) { static const int square2[24][2] = { { 1, 0}, { 0, 1}, {-1, 0}, { 0,-1},