From 470e1b284f31e294119c7bc457a762488b34dd60 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Tue, 24 May 2005 04:16:54 +0000 Subject: [PATCH] UMHex motion seach (but no early termination yet) git-svn-id: svn://svn.videolan.org/x264/trunk@238 df754926-b1dd-0310-bc7b-ec298dee348c --- encoder/encoder.c | 11 ++- encoder/me.c | 174 ++++++++++++++++++++++++++++++------------ encoder/ratecontrol.c | 4 + x264.c | 17 +++-- x264.h | 5 +- 5 files changed, 147 insertions(+), 64 deletions(-) diff --git a/encoder/encoder.c b/encoder/encoder.c index 37b94d79..5fb5fd52 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -374,13 +374,12 @@ static int x264_validate_parameters( x264_t *h ) h->param.i_cabac_init_idc = x264_clip3( h->param.i_cabac_init_idc, -1, 2 ); - if( h->param.analyse.i_me_method != X264_ME_DIA && - h->param.analyse.i_me_method != X264_ME_HEX && - h->param.analyse.i_me_method != X264_ME_ESA ) + if( h->param.analyse.i_me_method < X264_ME_DIA || + h->param.analyse.i_me_method > X264_ME_ESA ) h->param.analyse.i_me_method = X264_ME_HEX; - if( h->param.analyse.i_me_range < 2 ) - h->param.analyse.i_me_range = 2; - if( h->param.analyse.i_me_range > 16 && h->param.analyse.i_me_method != X264_ME_ESA ) + if( h->param.analyse.i_me_range < 4 ) + h->param.analyse.i_me_range = 4; + if( h->param.analyse.i_me_range > 16 && h->param.analyse.i_me_method <= X264_ME_HEX ) h->param.analyse.i_me_range = 16; h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 1, 5 ); if( !(h->param.analyse.inter & X264_ANALYSE_PSUB16x16) ) diff --git a/encoder/me.c b/encoder/me.c index fdfa5a37..901f3ec0 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -63,95 +63,173 @@ void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int const unsigned int i_me_range = h->param.analyse.i_me_range; const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8; int bmx, bmy, bcost; - int omx, omy; + int omx, omy, pmx, pmy; uint8_t *p_fref = m->p_fref[0]; - int i_iter; + int i, j; - const int mv_x_min = h->mb.mv_min_fpel[0]; - const int mv_y_min = h->mb.mv_min_fpel[1]; - const int mv_x_max = h->mb.mv_max_fpel[0]; - const int mv_y_max = h->mb.mv_max_fpel[1]; + int mv_x_min = h->mb.mv_min_fpel[0]; + int mv_y_min = h->mb.mv_min_fpel[1]; + int mv_x_max = h->mb.mv_max_fpel[0]; + int mv_y_max = h->mb.mv_max_fpel[1]; const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; + if( h->mb.i_me_method == X264_ME_UMH ) + { + /* clamp mvp to inside frame+padding, so that we don't have to check it each iteration */ + p_cost_mvx = m->p_cost_mv - x264_clip3( m->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] ); + p_cost_mvy = m->p_cost_mv - x264_clip3( m->mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] ); + } - /* init with mvp */ - /* XXX: We don't need to clamp because the way diamond work, we will - * never go outside padded picture, and predict mv won't compute vector - * with componant magnitude greater. - * XXX: if some vector can go outside, (accelerator, ....) you need to clip - * them yourself */ - bmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max ); - bmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max ); + bmx = pmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max ); + bmy = pmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max ); bcost = COST_MAX; COST_MV( bmx, bmy ); /* I don't know why this helps */ bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ]; /* try extra predictors if provided */ - for( i_iter = 0; i_iter < i_mvc; i_iter++ ) + for( i = 0; i < i_mvc; i++ ) { - const int mx = x264_clip3( ( mvc[i_iter][0] + 2 ) >> 2, mv_x_min, mv_x_max ); - const int my = x264_clip3( ( mvc[i_iter][1] + 2 ) >> 2, mv_y_min, mv_y_max ); + const int mx = x264_clip3( ( mvc[i][0] + 2 ) >> 2, mv_x_min, mv_x_max ); + const int my = x264_clip3( ( mvc[i][1] + 2 ) >> 2, mv_y_min, mv_y_max ); if( mx != bmx || my != bmy ) COST_MV( mx, my ); } COST_MV( 0, 0 ); + mv_x_max += 8; + mv_y_max += 8; + mv_x_min -= 8; + mv_y_min -= 8; + switch( h->mb.i_me_method ) { case X264_ME_DIA: - /* diamond search */ - for( i_iter = 0; i_iter < i_me_range; i_iter++ ) + /* diamond search, radius 1 */ +#define DIA1_ITER(mx, my)\ + {\ + omx = mx;\ + omy = my;\ + COST_MV( omx , omy-1 );\ + COST_MV( omx , omy+1 );\ + COST_MV( omx-1, omy );\ + COST_MV( omx+1, omy );\ + } + + for( i = 0; i < i_me_range; i++ ) { - omx = bmx; - omy = bmy; - COST_MV( omx , omy-1 ); - COST_MV( omx , omy+1 ); - COST_MV( omx-1, omy ); - COST_MV( omx+1, omy ); + DIA1_ITER( bmx, bmy ); if( bmx == omx && bmy == omy ) break; } break; + case X264_ME_HEX: - /* hexagon search */ - /* Don't need to test mv_range each time, we won't go outside picture+padding */ - omx = bmx; - omy = bmy; - for( i_iter = 0; i_iter < i_me_range/2; i_iter++ ) - { - COST_MV( omx-2, omy ); - COST_MV( omx-1, omy+2 ); - COST_MV( omx+1, omy+2 ); - COST_MV( omx+2, omy ); - COST_MV( omx+1, omy-2 ); - COST_MV( omx-1, omy-2 ); + /* hexagon search, radius 2 */ +#define HEX2_ITER(mx, my)\ + {\ + omx = mx;\ + omy = my;\ + COST_MV( omx-2, omy );\ + COST_MV( omx-1, omy+2 );\ + COST_MV( omx+1, omy+2 );\ + COST_MV( omx+2, omy );\ + COST_MV( omx+1, omy-2 );\ + COST_MV( omx-1, omy-2 );\ + } + for( i = 0; i < i_me_range/2; i++ ) + { + HEX2_ITER( bmx, bmy ); if( bmx == omx && bmy == omy ) break; - omx = bmx; - omy = bmy; } - /* square refine */ + DIA1_ITER( bmx, bmy ); COST_MV( omx-1, omy-1 ); - COST_MV( omx-1, omy ); COST_MV( omx-1, omy+1 ); - COST_MV( omx , omy-1 ); - COST_MV( omx , omy+1 ); COST_MV( omx+1, omy-1 ); - COST_MV( omx+1, omy ); COST_MV( omx+1, omy+1 ); break; + + case X264_ME_UMH: + /* Uneven-cross Multi-Hexagon-grid Search + * as in JM, except without early termination */ + + DIA1_ITER( pmx, pmy ); + if( pmx || pmy ) + DIA1_ITER( 0, 0 ); + DIA1_ITER( bmx, bmy ); + + if(i_pixel == PIXEL_4x4) + goto umh_small_hex; + + /* cross */ + omx = bmx; omy = bmy; + for( i = 1; i < i_me_range; i+=2 ) + { + if( omx + i <= mv_x_max ) + COST_MV( omx + i, omy ); + if( omx - i >= mv_x_min ) + COST_MV( omx - i, omy ); + } + for( i = 1; i < i_me_range/2; i+=2 ) + { + if( omy + i <= mv_y_max ) + COST_MV( omx, omy + i ); + if( omy - i >= mv_y_min ) + COST_MV( omx, omy - i ); + } + + /* 5x5 ESA */ + omx = bmx; omy = bmy; + for( i = 0; i < 24; i++ ) + { + static const int square2_x[24] = {1,1,0,-1,-1,-1, 0, 1, 2,2,2,2,1,0,-1,-2,-2,-2,-2,-2,-1, 0, 1, 2}; + static const int square2_y[24] = {0,1,1, 1, 0,-1,-1,-1,-1,0,1,2,2,2, 2, 2, 1, 0,-1,-2,-2,-2,-2,-2}; + COST_MV( omx + square2_x[i], omy + square2_y[i] ); + } + /* hexagon grid */ + omx = bmx; omy = bmy; + for( i = 1; i <= i_me_range/4; i++ ) + { + int bounds_check = 4*i > X264_MIN4( mv_x_max-omx, mv_y_max-omy, omx-mv_x_min, omy-mv_y_min ); + for( j = 0; j < 16; j++ ) + { + static const int hex4_x[16] = {0,-2,-4,-4,-4,-4,-4,-2, 0, 2, 4, 4,4,4,4,2}; + static const int hex4_y[16] = {4, 3, 2, 1, 0,-1,-2,-3,-4,-3,-2,-1,0,1,2,3}; + int mx = omx + hex4_x[j]*i; + int my = omy + hex4_y[j]*i; + if( !bounds_check || ( mx >= mv_x_min && mx <= mv_x_max + && my >= mv_y_min && my <= mv_y_max ) ) + COST_MV( mx, my ); + } + } +umh_small_hex: + /* iterative search */ + for( i = 0; i < i_me_range; i++ ) + { + HEX2_ITER( bmx, bmy ); + if( bmx == omx && bmy == omy ) + break; + } + for( i = 0; i < i_me_range; i++ ) + { + DIA1_ITER( bmx, bmy ); + if( bmx == omx && bmy == omy ) + break; + } + break; + case X264_ME_ESA: { - const int min_x = X264_MAX( bmx - i_me_range, mv_x_min-8); - const int min_y = X264_MAX( bmy - i_me_range, mv_y_min-8); - const int max_x = X264_MIN( bmx + i_me_range, mv_x_max+8); - const int max_y = X264_MIN( bmy + i_me_range, mv_y_max+8); + const int min_x = X264_MAX( bmx - i_me_range, mv_x_min); + const int min_y = X264_MAX( bmy - i_me_range, mv_y_min); + const int max_x = X264_MIN( bmx + i_me_range, mv_x_max); + const int max_y = X264_MIN( bmy + i_me_range, mv_y_max); for( omy = min_y; omy <= max_y; omy++ ) for( omx = min_x; omx <= max_x; omx++ ) { diff --git a/encoder/ratecontrol.c b/encoder/ratecontrol.c index f79e925c..571bce09 100644 --- a/encoder/ratecontrol.c +++ b/encoder/ratecontrol.c @@ -218,6 +218,10 @@ int x264_ratecontrol_new( x264_t *h ) } else if( h->param.rc.i_vbv_max_bitrate || h->param.rc.i_vbv_buffer_size ) x264_log(h, X264_LOG_ERROR, "VBV maxrate or buffer size specified, but not both.\n"); + if(rc->rate_tolerance < 0.01) { + x264_log(h, X264_LOG_ERROR, "bitrate tolerance too small, using .01\n"); + rc->rate_tolerance = 0.01; + } if( rc->b_abr ) { diff --git a/x264.c b/x264.c index 702ee98d..738d8db5 100644 --- a/x264.c +++ b/x264.c @@ -223,7 +223,8 @@ static void Help( x264_param_t *defaults ) " --me Integer pixel motion estimation method [\"%s\"]\n" " - dia: diamond search, radius 1 (fast)\n" " - hex: hexagonal search, radius 2\n" - " - esa: exhaustive search algorithm (slow)\n" + " - umh: uneven multi-hexagon search\n" + " - esa: exhaustive search (slow)\n" " --merange Maximum motion vector search range [%d]\n" " -m, --subme Subpixel motion estimation quality: 1=fast, 5=best. [%d]\n" " --no-chroma-me Ignore chroma in motion estimation\n" @@ -282,6 +283,7 @@ static void Help( x264_param_t *defaults ) defaults->rc.f_qblur, defaults->analyse.i_me_method==X264_ME_DIA ? "dia" : defaults->analyse.i_me_method==X264_ME_HEX ? "hex" + : defaults->analyse.i_me_method==X264_ME_UMH ? "umh" : defaults->analyse.i_me_method==X264_ME_ESA ? "esa" : NULL, defaults->analyse.i_me_range, defaults->analyse.i_subpel_refine @@ -566,13 +568,12 @@ static int Parse( int argc, char **argv, param->analyse.b_weighted_bipred = 1; break; case OPT_ME: - if( strstr( optarg, "dia" ) ) - param->analyse.i_me_method = X264_ME_DIA; - else if( strstr( optarg, "hex" ) ) - param->analyse.i_me_method = X264_ME_HEX; - else if( strstr( optarg, "esa" ) ) - param->analyse.i_me_method = X264_ME_ESA; - else + param->analyse.i_me_method = + strstr( optarg, "dia" ) ? X264_ME_DIA : + strstr( optarg, "hex" ) ? X264_ME_HEX : + strstr( optarg, "umh" ) ? X264_ME_UMH : + strstr( optarg, "esa" ) ? X264_ME_ESA : -1; + if( param->analyse.i_me_method == -1 ) { fprintf( stderr, "bad ME method `%s'\n", optarg ); return -1; diff --git a/x264.h b/x264.h index 790e583e..c934bd1c 100644 --- a/x264.h +++ b/x264.h @@ -26,7 +26,7 @@ #include -#define X264_BUILD 26 +#define X264_BUILD 27 /* x264_t: * opaque handler for decoder and encoder */ @@ -56,7 +56,8 @@ typedef struct x264_t x264_t; #define X264_DIRECT_PRED_TEMPORAL 2 #define X264_ME_DIA 0 #define X264_ME_HEX 1 -#define X264_ME_ESA 2 +#define X264_ME_UMH 2 +#define X264_ME_ESA 3 /* Colorspace type */ -- 2.40.0