From 2fa8f84b6e108222735c2895b6419ed8c29ef031 Mon Sep 17 00:00:00 2001 From: Loren Merritt Date: Fri, 30 Dec 2005 04:56:49 +0000 Subject: [PATCH] joint bidirectional motion refinement (--bime) git-svn-id: svn://svn.videolan.org/x264/trunk@390 df754926-b1dd-0310-bc7b-ec298dee348c --- common/common.c | 5 ++- encoder/analyse.c | 32 ++++++++++++++ encoder/me.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++ encoder/me.h | 1 + x264.c | 8 +++- x264.h | 3 +- 6 files changed, 154 insertions(+), 4 deletions(-) diff --git a/common/common.c b/common/common.c index 73878ebe..2f7b73fd 100644 --- a/common/common.c +++ b/common/common.c @@ -462,9 +462,10 @@ char *x264_param2string( x264_param_t *p, int b_res ) s += sprintf( s, " bframes=%d", p->i_bframe ); if( p->i_bframe ) { - s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d wpredb=%d", + s += sprintf( s, " b_pyramid=%d b_adapt=%d b_bias=%d direct=%d wpredb=%d bime=%d", p->b_bframe_pyramid, p->b_bframe_adaptive, p->i_bframe_bias, - p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred ); + p->analyse.i_direct_mv_pred, p->analyse.b_weighted_bipred, + p->analyse.b_bidir_me ); } s += sprintf( s, " keyint=%d keyint_min=%d scenecut=%d", diff --git a/encoder/analyse.c b/encoder/analyse.c index c0ab8f13..2381a2ed 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -1700,6 +1700,35 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) } } +static void refine_bidir( x264_t *h, x264_mb_analysis_t *a ) +{ + const int i_biweight = h->mb.bipred_weight[a->l0.i_ref][a->l1.i_ref]; + int i; + + switch( h->mb.i_partition ) + { + case D_16x16: + if( h->mb.i_type == B_BI_BI ) + x264_me_refine_bidir( h, &a->l0.me16x16, &a->l1.me16x16, i_biweight ); + break; + case D_16x8: + for( i=0; i<2; i++ ) + if( a->i_mb_partition16x8[i] == D_BI_8x8 ) + x264_me_refine_bidir( h, &a->l0.me16x8[i], &a->l1.me16x8[i], i_biweight ); + break; + case D_8x16: + for( i=0; i<2; i++ ) + if( a->i_mb_partition8x16[i] == D_BI_8x8 ) + x264_me_refine_bidir( h, &a->l0.me8x16[i], &a->l1.me8x16[i], i_biweight ); + break; + case D_8x8: + for( i=0; i<4; i++ ) + if( h->mb.i_sub_partition[i] == D_BI_8x8 ) + x264_me_refine_bidir( h, &a->l0.me8x8[i], &a->l1.me8x8[i], i_biweight ); + break; + } +} + static inline void x264_mb_analyse_transform( x264_t *h ) { h->mb.cache.b_transform_8x8_allowed = @@ -2230,6 +2259,9 @@ void x264_macroblock_analyse( x264_t *h ) } h->mb.i_type = i_type; + + if( h->param.analyse.b_bidir_me ) + refine_bidir( h, &analysis ); } } diff --git a/encoder/me.c b/encoder/me.c index f976d067..26ec3f47 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -451,3 +451,112 @@ static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_ite m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ]; } +#define BIME_CACHE( dx, dy ) \ +{ \ + int i = 4 + 3*dx + dy; \ + h->mc.mc_luma( m0->p_fref, m0->i_stride[0], pix0[i], bw, om0x+dx, om0y+dy, bw, bh ); \ + h->mc.mc_luma( m1->p_fref, m1->i_stride[0], pix1[i], bw, om1x+dx, om1y+dy, bw, bh ); \ +} + +#define BIME_CACHE2(a,b) \ + BIME_CACHE( a, b) \ + BIME_CACHE(-a,-b) + +#define COST_BIMV_SATD( m0x, m0y, m1x, m1y ) \ +if( pass == 0 || !visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] ) \ +{ \ + int cost; \ + int i0 = 4 + 3*(m0x-om0x) + (m0y-om0y); \ + int i1 = 4 + 3*(m1x-om1x) + (m1y-om1y); \ + visited[(m0x)&7][(m0y)&7][(m1x)&7][(m1y)&7] = 1; \ + memcpy( pix, pix0[i0], bs ); \ + if( i_weight == 32 ) \ + h->mc.avg[i_pixel]( pix, bw, pix1[i1], bw ); \ + else \ + h->mc.avg_weight[i_pixel]( pix, bw, pix1[i1], bw, i_weight ); \ + cost = h->pixf.mbcmp[i_pixel]( m0->p_fenc[0], m0->i_stride[0], pix, bw ) \ + + p_cost_m0x[ m0x ] + p_cost_m0y[ m0y ] \ + + p_cost_m1x[ m1x ] + p_cost_m1y[ m1y ]; \ + if( cost < bcost ) \ + { \ + bcost = cost; \ + bm0x = m0x; \ + bm0y = m0y; \ + bm1x = m1x; \ + bm1y = m1y; \ + } \ +} + +#define CHECK_BIDIR(a,b,c,d) \ + COST_BIMV_SATD(om0x+a, om0y+b, om1x+c, om1y+d) + +#define CHECK_BIDIR2(a,b,c,d) \ + CHECK_BIDIR( a ,b, c, d) \ + CHECK_BIDIR(-a,-b,-c,-d) + +#define CHECK_BIDIR8(a,b,c,d) \ + CHECK_BIDIR2(a,b,c,d) \ + CHECK_BIDIR2(b,c,d,a) \ + CHECK_BIDIR2(c,d,a,b) \ + CHECK_BIDIR2(d,a,b,c) + +int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ) +{ + const int i_pixel = m0->i_pixel; + const int bw = x264_pixel_size[i_pixel].w; + const int bh = x264_pixel_size[i_pixel].h; + const int bs = bw*bh; + const int16_t *p_cost_m0x = m0->p_cost_mv - x264_clip3( m0->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] ); + const int16_t *p_cost_m0y = m0->p_cost_mv - x264_clip3( m0->mvp[1], h->mb.mv_min[0], h->mb.mv_max[0] ); + const int16_t *p_cost_m1x = m1->p_cost_mv - x264_clip3( m1->mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] ); + const int16_t *p_cost_m1y = m1->p_cost_mv - x264_clip3( m1->mvp[1], h->mb.mv_min[0], h->mb.mv_max[0] ); + DECLARE_ALIGNED( uint8_t, pix0[9][16*16], 16 ); + DECLARE_ALIGNED( uint8_t, pix1[9][16*16], 16 ); + DECLARE_ALIGNED( uint8_t, pix[16*16], 16 ); + int bm0x = m0->mv[0], om0x = bm0x; + int bm0y = m0->mv[1], om0y = bm0y; + int bm1x = m1->mv[0], om1x = bm1x; + int bm1y = m1->mv[1], om1y = bm1y; + int bcost = COST_MAX; + int pass = 0; + uint8_t visited[8][8][8][8]; + memset( visited, 0, sizeof(visited) ); + + BIME_CACHE( 0, 0 ); + CHECK_BIDIR( 0, 0, 0, 0 ); + + for( pass = 0; pass < 8; pass++ ) + { + /* check all mv pairs that differ in at most 2 components from the current mvs. */ + /* doesn't do chroma ME. this probably doesn't matter, as the gains + * from bidir ME are the same with and without chroma ME. */ + + BIME_CACHE2( 1, 0 ); + BIME_CACHE2( 0, 1 ); + BIME_CACHE2( 1, 1 ); + BIME_CACHE2( 1,-1 ); + + CHECK_BIDIR8( 0, 0, 0, 1 ); + CHECK_BIDIR8( 0, 0, 1, 1 ); + CHECK_BIDIR2( 0, 1, 0, 1 ); + CHECK_BIDIR2( 1, 0, 1, 0 ); + CHECK_BIDIR8( 0, 0,-1, 1 ); + CHECK_BIDIR2( 0,-1, 0, 1 ); + CHECK_BIDIR2(-1, 0, 1, 0 ); + + if( om0x == bm0x && om0y == bm0y && om1x == bm1x && om1y == bm1y ) + break; + + om0x = bm0x; + om0y = bm0y; + om1x = bm1x; + om1y = bm1y; + BIME_CACHE( 0, 0 ); + } + + m0->mv[0] = bm0x; + m0->mv[1] = bm0y; + m1->mv[0] = bm1x; + m1->mv[1] = bm1y; + return bcost; +} diff --git a/encoder/me.h b/encoder/me.h index 18ad9078..03678c82 100644 --- a/encoder/me.h +++ b/encoder/me.h @@ -52,5 +52,6 @@ static inline void x264_me_search( x264_t *h, x264_me_t *m, int (*mvc)[2], int i { x264_me_search_ref( h, m, mvc, i_mvc, NULL ); } void x264_me_refine_qpel( x264_t *h, x264_me_t *m ); +int x264_me_refine_bidir( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight ); #endif diff --git a/x264.c b/x264.c index 58898a4f..0bc496af 100644 --- a/x264.c +++ b/x264.c @@ -256,6 +256,7 @@ static void Help( x264_param_t *defaults ) " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" " --mixed-refs Decide references on a per partition basis\n" " --no-chroma-me Ignore chroma in motion estimation\n" + " --bime Jointly optimize both MVs in B-frames\n" " -8, --8x8dct Adaptive spatial transform size\n" " -t, --trellis Trellis RD quantization. Requires CABAC. [%d]\n" " - 0: disabled\n" @@ -480,6 +481,7 @@ static int Parse( int argc, char **argv, #define OPT_CRF 315 #define OPT_B_RDO 316 #define OPT_NO_FAST_PSKIP 317 +#define OPT_BIME 318 static struct option long_options[] = { @@ -513,9 +515,10 @@ static int Parse( int argc, char **argv, { "me", required_argument, NULL, OPT_ME }, { "merange", required_argument, NULL, OPT_MERANGE }, { "subme", required_argument, NULL, 'm' }, - { "b-rdo", no_argument, NULL, OPT_B_RDO }, + { "b-rdo", no_argument, NULL, OPT_B_RDO }, { "mixed-refs", no_argument, NULL, OPT_MIXED_REFS }, { "no-chroma-me", no_argument, NULL, OPT_NO_CHROMA_ME }, + { "bime", no_argument, NULL, OPT_BIME }, { "8x8dct", no_argument, NULL, '8' }, { "trellis", required_argument, NULL, 't' }, { "no-fast-pskip", no_argument, NULL, OPT_NO_FAST_PSKIP }, @@ -745,6 +748,9 @@ static int Parse( int argc, char **argv, case OPT_NO_CHROMA_ME: param->analyse.b_chroma_me = 0; break; + case OPT_BIME: + param->analyse.b_bidir_me = 1; + break; case '8': param->analyse.b_transform_8x8 = 1; break; diff --git a/x264.h b/x264.h index 1de65f7f..993b8e3a 100644 --- a/x264.h +++ b/x264.h @@ -35,7 +35,7 @@ #include -#define X264_BUILD 42 +#define X264_BUILD 43 /* x264_t: * opaque handler for decoder and encoder */ @@ -196,6 +196,7 @@ typedef struct int i_me_range; /* integer pixel motion estimation search range (from predicted mv) */ int i_mv_range; /* maximum length of a mv (in pixels) */ int i_subpel_refine; /* subpixel motion estimation quality */ + int b_bidir_me; /* jointly optimize both MVs in B-frames */ int b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */ int b_bframe_rdo; /* RD based mode decision for B-frames */ int b_mixed_references; /* allow each mb partition in P-frames to have it's own reference number */ -- 2.50.0