From: Loren Merritt Date: Mon, 22 Nov 2004 07:34:17 +0000 (+0000) Subject: multiple hypothesis mv prediction: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0f65f519a8539602e08fd87d9c21b0b3f34a80d8;p=libx264 multiple hypothesis mv prediction: 1-3% improved compression, and .5-1% faster git-svn-id: svn://svn.videolan.org/x264/trunk@63 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/core/common.h b/core/common.h index ee514254..73af25c3 100644 --- a/core/common.h +++ b/core/common.h @@ -269,7 +269,8 @@ struct x264_t int8_t *chroma_pred_mode; /* chroma_pred_mode. cabac only. for non intra I_PRED_CHROMA_DC(0) */ int16_t (*mv[2])[2]; /* mb mv. set to 0 for intra mb */ int16_t (*mvd[2])[2]; /* mb mv difference with predict. set to 0 if intra. cabac only */ - int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only */ + int8_t *ref[2]; /* mb ref. set to -1 if non used (intra or Lx only) */ + int16_t (*mvr[2][16])[2]; /* mb mv for each possible ref */ /* current value */ int i_type; diff --git a/core/macroblock.c b/core/macroblock.c index a4d843de..77982488 100644 --- a/core/macroblock.c +++ b/core/macroblock.c @@ -349,6 +349,41 @@ void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] ) } } +void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc ) +{ + int16_t (*mvr)[2] = h->mb.mvr[i_list][i_ref]; + + int i = 0; + if( h->mb.i_mb_x > 0 ) + { + int i_mb_l = h->mb.i_mb_xy - 1; + mvc[i][0] = mvr[i_mb_l][0]; + mvc[i][1] = mvr[i_mb_l][1]; + i++; + } + if( h->mb.i_mb_y > 0 ) + { + int i_mb_t = h->mb.i_mb_xy - h->mb.i_mb_stride; + mvc[i][0] = mvr[i_mb_t][0]; + mvc[i][1] = mvr[i_mb_t][1]; + i++; + + if( h->mb.i_mb_x > 0 ) + { + mvc[i][0] = mvr[i_mb_t - 1][0]; + mvc[i][1] = mvr[i_mb_t - 1][1]; + i++; + } + if( h->mb.i_mb_x < h->mb.i_mb_stride - 1 ) + { + mvc[i][0] = mvr[i_mb_t + 1][0]; + mvc[i][1] = mvr[i_mb_t + 1][1]; + i++; + } + } + *i_mvc = i; +} + static inline void x264_mb_mc_0xywh( x264_t *h, int x, int y, int width, int height ) { const int i8 = x264_scan8[0]+x+8*y; @@ -532,7 +567,8 @@ void x264_mb_mc( x264_t *h ) void x264_macroblock_cache_init( x264_t *h ) { - int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height; + int i, j; + int i_mb_count = h->sps->i_mb_width * h->sps->i_mb_height; h->mb.i_mb_stride = h->sps->i_mb_width; @@ -558,12 +594,20 @@ void x264_macroblock_cache_init( x264_t *h ) h->mb.mvd[1] = x264_malloc( 2*16 * i_mb_count * sizeof( int16_t ) ); } + for( i=0; i<2; i++ ) + for( j=0; j<16; j++ ) /* FIXME: alloc no more than param.i_frame_reference */ + h->mb.mvr[i][j] = x264_malloc( 2 * i_mb_count * sizeof( int16_t ) ); + /* init with not avaiable (for top right idx=7,15) */ memset( h->mb.cache.ref[0], -2, X264_SCAN8_SIZE * sizeof( int8_t ) ); memset( h->mb.cache.ref[1], -2, X264_SCAN8_SIZE * sizeof( int8_t ) ); } void x264_macroblock_cache_end( x264_t *h ) { + int i, j; + for( i=0; i<2; i++ ) + for( j=0; j<16; j++ ) + x264_free( h->mb.mvr[i][j] ); if( h->param.b_cabac ) { x264_free( h->mb.chroma_pred_mode ); diff --git a/core/macroblock.h b/core/macroblock.h index 5419f87e..1cc6f6aa 100644 --- a/core/macroblock.h +++ b/core/macroblock.h @@ -143,17 +143,22 @@ void x264_mb_dequant_4x4( int16_t dct[4][4], int i_qscale ); /* x264_mb_predict_mv_16x16: * set mvp with predicted mv for D_16x16 block - * h->mb. need only valid values from others block */ + * h->mb. need only valid values from other blocks */ void x264_mb_predict_mv_16x16( x264_t *h, int i_list, int i_ref, int mvp[2] ); /* x264_mb_predict_mv_pskip: * set mvp with predicted mv for P_SKIP - * h->mb. need only valid values from others block */ + * h->mb. need only valid values from other blocks */ void x264_mb_predict_mv_pskip( x264_t *h, int mv[2] ); /* x264_mb_predict_mv: * set mvp with predicted mv for all blocks except P_SKIP * h->mb. need valid ref/partition/sub of current block to be valid - * and valid mv/ref from others block . */ + * and valid mv/ref from other blocks . */ void x264_mb_predict_mv( x264_t *h, int i_list, int idx, int i_width, int mvp[2] ); +/* x264_mb_predict_mv_ref16x16: + * set mvc with D_16x16 prediction. + * uses all neighbors, even those that didn't end up using this ref. + * need only valid values from other blocks */ +void x264_mb_predict_mv_ref16x16( x264_t *h, int i_list, int i_ref, int mvc[4][2], int *i_mvc ); int x264_mb_predict_intra4x4_mode( x264_t *h, int idx ); diff --git a/encoder/analyse.c b/encoder/analyse.c index a44ea542..da79c64e 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -455,6 +455,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) { x264_me_t m; int i_ref; + int mvc[4][2], i_mvc; /* 16x16 Search on all ref frame */ m.i_pixel = PIXEL_16x16; @@ -462,9 +463,6 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) m.p_fenc = h->mb.pic.p_fenc[0]; m.i_stride= h->mb.pic.i_stride[0]; m.i_mv_range = a->i_mv_range; - m.b_mvc = 0; -// m.mvc[0] = 0; -// m.mvc[1] = 0; a->l0.me16x16.cost = INT_MAX; for( i_ref = 0; i_ref < h->i_ref0; i_ref++ ) @@ -472,7 +470,8 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) /* search with ref */ m.p_fref = h->mb.pic.p_fref[0][i_ref][0]; x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp ); - x264_me_search( h, &m ); + x264_mb_predict_mv_ref16x16( h, 0, i_ref, mvc, &i_mvc ); + x264_me_search( h, &m, mvc, i_mvc ); /* add ref cost */ m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref ); @@ -482,6 +481,10 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) a->l0.i_ref = i_ref; a->l0.me16x16 = m; } + + /* save mv for predicting neighbors */ + h->mb.mvr[0][i_ref][h->mb.i_mb_xy][0] = m.mv[0]; + h->mb.mvr[0][i_ref][h->mb.i_mb_xy][1] = m.mv[1]; } /* subtract ref cost, so we don't have to add it for the other P types */ @@ -495,12 +498,16 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a ) { uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0]; uint8_t *p_fenc = h->mb.pic.p_fenc[0]; - + int mvc[5][2], i_mvc; int i; /* XXX Needed for x264_mb_predict_mv */ h->mb.i_partition = D_8x8; + i_mvc = 1; + mvc[0][0] = a->l0.me16x16.mv[0]; + mvc[0][1] = a->l0.me16x16.mv[1]; + for( i = 0; i < 4; i++ ) { x264_me_t *m = &a->l0.me8x8[i]; @@ -515,21 +522,14 @@ static void x264_mb_analyse_inter_p8x8( x264_t *h, x264_mb_analysis_t *a ) m->i_stride= h->mb.pic.i_stride[0]; m->i_mv_range = a->i_mv_range; - if( i == 0 ) - { - m->b_mvc = 1; - m->mvc[0] = a->l0.me16x16.mv[0]; - m->mvc[1] = a->l0.me16x16.mv[1]; - } - else - { - m->b_mvc = 0; - } - x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp ); - x264_me_search( h, m ); + x264_me_search( h, m, mvc, i_mvc ); x264_macroblock_cache_mv( h, 2*x8, 2*y8, 2, 2, 0, m->mv[0], m->mv[1] ); + + mvc[i_mvc][0] = m->mv[0]; + mvc[i_mvc][1] = m->mv[1]; + i_mvc++; } a->l0.i_cost8x8 = a->l0.me8x8[0].cost + a->l0.me8x8[1].cost + @@ -540,7 +540,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a ) { uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0]; uint8_t *p_fenc = h->mb.pic.p_fenc[0]; - + int mvc[2][2]; int i; /* XXX Needed for x264_mb_predict_mv */ @@ -558,12 +558,13 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a ) m->i_stride= h->mb.pic.i_stride[0]; m->i_mv_range = a->i_mv_range; - m->b_mvc = 1; - m->mvc[0] = a->l0.me8x8[2*i].mv[0]; - m->mvc[1] = a->l0.me8x8[2*i].mv[1]; + mvc[0][0] = a->l0.me8x8[2*i].mv[0]; + mvc[0][1] = a->l0.me8x8[2*i].mv[1]; + mvc[1][0] = a->l0.me8x8[2*i+1].mv[0]; + mvc[1][1] = a->l0.me8x8[2*i+1].mv[1]; x264_mb_predict_mv( h, 0, 8*i, 4, m->mvp ); - x264_me_search( h, m ); + x264_me_search( h, m, mvc, 2 ); x264_macroblock_cache_mv( h, 0, 2*i, 4, 2, 0, m->mv[0], m->mv[1] ); } @@ -575,7 +576,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a ) { uint8_t *p_fref = h->mb.pic.p_fref[0][a->l0.i_ref][0]; uint8_t *p_fenc = h->mb.pic.p_fenc[0]; - + int mvc[2][2]; int i; /* XXX Needed for x264_mb_predict_mv */ @@ -593,12 +594,13 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a ) m->i_stride= h->mb.pic.i_stride[0]; m->i_mv_range = a->i_mv_range; - m->b_mvc = 1; - m->mvc[0] = a->l0.me8x8[i].mv[0]; - m->mvc[1] = a->l0.me8x8[i].mv[1]; + mvc[0][0] = a->l0.me8x8[i].mv[0]; + mvc[0][1] = a->l0.me8x8[i].mv[1]; + mvc[1][0] = a->l0.me8x8[i+2].mv[0]; + mvc[1][1] = a->l0.me8x8[i+2].mv[1]; x264_mb_predict_mv( h, 0, 4*i, 2, m->mvp ); - x264_me_search( h, m ); + x264_me_search( h, m, mvc, 2 ); x264_macroblock_cache_mv( h, 2*i, 0, 2, 4, 0, m->mv[0], m->mv[1] ); } @@ -621,6 +623,7 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8 const int idx = 4*i8x8 + i4x4; const int x4 = block_idx_x[idx]; const int y4 = block_idx_y[idx]; + const int i_mvc = (i4x4 == 0); x264_me_t *m = &a->l0.me4x4[i8x8][i4x4]; @@ -632,19 +635,8 @@ static void x264_mb_analyse_inter_p4x4( x264_t *h, x264_mb_analysis_t *a, int i8 m->i_stride= h->mb.pic.i_stride[0]; m->i_mv_range = a->i_mv_range; - if( i4x4 == 0 ) - { - m->b_mvc = 1; - m->mvc[0] = a->l0.me8x8[i8x8].mv[0]; - m->mvc[1] = a->l0.me8x8[i8x8].mv[1]; - } - else - { - m->b_mvc = 0; - } - x264_mb_predict_mv( h, 0, idx, 1, m->mvp ); - x264_me_search( h, m ); + x264_me_search( h, m, &a->l0.me8x8[i8x8].mv, i_mvc ); x264_macroblock_cache_mv( h, x4, y4, 1, 1, 0, m->mv[0], m->mv[1] ); } @@ -670,6 +662,7 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8 const int idx = 4*i8x8 + 2*i8x4; const int x4 = block_idx_x[idx]; const int y4 = block_idx_y[idx]; + const int i_mvc = (i8x4 == 0); x264_me_t *m = &a->l0.me8x4[i8x8][i8x4]; @@ -681,19 +674,8 @@ static void x264_mb_analyse_inter_p8x4( x264_t *h, x264_mb_analysis_t *a, int i8 m->i_stride= h->mb.pic.i_stride[0]; m->i_mv_range = a->i_mv_range; - if( i8x4 == 0 ) - { - m->b_mvc = 1; - m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0]; - m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1]; - } - else - { - m->b_mvc = 0; - } - x264_mb_predict_mv( h, 0, idx, 2, m->mvp ); - x264_me_search( h, m ); + x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc ); x264_macroblock_cache_mv( h, x4, y4, 2, 1, 0, m->mv[0], m->mv[1] ); } @@ -716,6 +698,7 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8 const int idx = 4*i8x8 + i4x8; const int x4 = block_idx_x[idx]; const int y4 = block_idx_y[idx]; + const int i_mvc = (i4x8 == 0); x264_me_t *m = &a->l0.me4x8[i8x8][i4x8]; @@ -727,19 +710,8 @@ static void x264_mb_analyse_inter_p4x8( x264_t *h, x264_mb_analysis_t *a, int i8 m->i_stride= h->mb.pic.i_stride[0]; m->i_mv_range = a->i_mv_range; - if( i4x8 == 0 ) - { - m->b_mvc = 1; - m->mvc[0] = a->l0.me4x4[i8x8][0].mv[0]; - m->mvc[1] = a->l0.me4x4[i8x8][0].mv[1]; - } - else - { - m->b_mvc = 0; - } - x264_mb_predict_mv( h, 0, idx, 1, m->mvp ); - x264_me_search( h, m ); + x264_me_search( h, m, &a->l0.me4x4[i8x8][0].mv, i_mvc ); x264_macroblock_cache_mv( h, x4, y4, 1, 2, 0, m->mv[0], m->mv[1] ); } @@ -760,7 +732,6 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) m.lm = a->i_lambda; m.p_fenc = h->mb.pic.p_fenc[0]; m.i_stride= h->mb.pic.i_stride[0]; - m.b_mvc = 0; m.i_mv_range = a->i_mv_range; /* ME for List 0 */ @@ -770,7 +741,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) /* search with ref */ m.p_fref = h->mb.pic.p_fref[0][i_ref][0]; x264_mb_predict_mv_16x16( h, 0, i_ref, m.mvp ); - x264_me_search( h, &m ); + x264_me_search( h, &m, NULL, 0 ); /* add ref cost */ m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l0_active - 1, i_ref ); @@ -789,7 +760,7 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) /* search with ref */ m.p_fref = h->mb.pic.p_fref[1][i_ref][0]; x264_mb_predict_mv_16x16( h, 1, i_ref, m.mvp ); - x264_me_search( h, &m ); + x264_me_search( h, &m, NULL, 0 ); /* add ref cost */ m.cost += m.lm * bs_size_te( h->sh.i_num_ref_idx_l1_active - 1, i_ref ); diff --git a/encoder/me.c b/encoder/me.c index 90131515..e105031c 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -42,7 +42,7 @@ const static int subpel_iterations[][4] = static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters ); -void x264_me_search( x264_t *h, x264_me_t *m ) +void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc ) { const int i_pixel = m->i_pixel; int bcost; @@ -66,19 +66,22 @@ void x264_me_search( x264_t *h, x264_me_t *m ) /* try a candidate if provided */ - if( m->b_mvc ) + for( i_iter = 0; i_iter < i_mvc; i_iter++ ) { - const int mx = x264_clip3( ( m->mvc[0] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range ); - const int my = x264_clip3( ( m->mvc[1] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range ); - uint8_t *p_fref2 = &m->p_fref[my*m->i_stride+mx]; - int cost = h->pixf.sad[i_pixel]( m->p_fenc, m->i_stride, p_fref2, m->i_stride ) + - m->lm * ( bs_size_se( m->mvc[0] - m->mvp[0] ) + bs_size_se( m->mvc[1] - m->mvp[1] ) ); - if( cost < bcost ) + const int mx = x264_clip3( ( mvc[i_iter][0] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range ); + const int my = x264_clip3( ( mvc[i_iter][1] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range ); + if( mx != bmx || my != bmy ) { - bmx = mx; - bmy = my; - bcost = cost; - p_fref = p_fref2; + uint8_t *p_fref2 = &m->p_fref[my*m->i_stride+mx]; + int cost = h->pixf.sad[i_pixel]( m->p_fenc, m->i_stride, p_fref2, m->i_stride ) + + m->lm * ( bs_size_se( mx - m->mvp[0] ) + bs_size_se( my - m->mvp[1] ) ); + if( cost < bcost ) + { + bmx = mx; + bmy = my; + bcost = cost; + p_fref = p_fref2; + } } } diff --git a/encoder/me.h b/encoder/me.h index bc639a3e..7856243d 100644 --- a/encoder/me.h +++ b/encoder/me.h @@ -38,15 +38,12 @@ typedef struct int mvp[2]; - int b_mvc; - int mvc[2]; - /* output */ int cost; /* satd + lm * nbits */ int mv[2]; } x264_me_t; -void x264_me_search( x264_t *h, x264_me_t *m ); +void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc ); void x264_me_refine_qpel( x264_t *h, x264_me_t *m ); #endif