From: Loren Merritt Date: Fri, 18 Nov 2005 11:20:07 +0000 (+0000) Subject: RD mode decision for B-frames (--b-rdo) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6fe92323b2e007e3a31714fb5b090b732fc24e62;p=libx264 RD mode decision for B-frames (--b-rdo) patch by Alex Wright. git-svn-id: svn://svn.videolan.org/x264/trunk@368 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/encoder/analyse.c b/encoder/analyse.c index 493228dc..2fa52854 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -188,7 +188,8 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int i_qp ) a->i_qp = h->mb.i_qp = i_qp; a->i_lambda = i_qp0_cost_table[i_qp]; a->i_lambda2 = i_qp0_cost2_table[i_qp]; - a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 && h->sh.i_type != SLICE_TYPE_B; + a->b_mbrd = h->param.analyse.i_subpel_refine >= 6 && + ( h->sh.i_type != SLICE_TYPE_B || h->param.analyse.b_bframe_rdo ); h->mb.i_me_method = h->param.analyse.i_me_method; h->mb.i_subpel_refine = h->param.analyse.i_subpel_refine; @@ -460,8 +461,12 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_cost_ int i, idx; int i_max; int predict_mode[9]; + int i_satd_thresh; - const int i_satd_thresh = a->i_best_satd * 5/4 + a->i_lambda * 10; + if( h->sh.i_type == SLICE_TYPE_B ) + i_satd_thresh = a->i_best_satd * 9/8; + else + i_satd_thresh = a->i_best_satd * 5/4 + a->i_lambda * 10; /*---------------- Try all mode and calculate their score ---------------*/ @@ -1143,8 +1148,16 @@ static void x264_mb_analyse_inter_direct( x264_t *h, x264_mb_analysis_t *a ) /* mb type cost */ a->i_cost8x8direct[i] += a->i_lambda * i_sub_mb_b_cost_table[D_DIRECT_8x8]; } - a->i_cost16x16direct += a->i_lambda * i_mb_b_cost_table[B_DIRECT]; + + if( a->b_mbrd ) + { + if( a->i_cost16x16direct < a->i_best_satd ) + a->i_best_satd = a->i_cost16x16direct; + + h->mb.i_type = B_DIRECT; + a->i_cost16x16direct = x264_rd_cost_mb( h, a->i_lambda2 ); + } } #define WEIGHTED_AVG( size, pix1, stride1, src2, stride2 ) \ @@ -1278,6 +1291,31 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) a->i_cost16x16bi += a->i_lambda * i_mb_b_cost_table[B_BI_BI]; a->l0.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L0_L0]; a->l1.me16x16.cost += a->i_lambda * i_mb_b_cost_table[B_L1_L1]; + + if( a->b_mbrd ) + { + if( a->l0.me16x16.cost < a->i_best_satd ) + a->i_best_satd = a->l0.me16x16.cost; + if( a->l1.me16x16.cost < a->i_best_satd ) + a->i_best_satd = a->l1.me16x16.cost; + if( a->i_cost16x16bi < a->i_best_satd ) + a->i_best_satd = a->i_cost16x16bi; + + h->mb.i_partition = D_16x16; + /* L0 */ + h->mb.i_type = B_L0_L0; + x264_macroblock_cache_mv( h, 0, 0, 4, 4, 0, a->l0.me16x16.mv[0], a->l0.me16x16.mv[1] ); + a->l0.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 ); + + /* L1 */ + h->mb.i_type = B_L1_L1; + x264_macroblock_cache_mv( h, 0, 0, 4, 4, 1, a->l1.me16x16.mv[0], a->l1.me16x16.mv[1] ); + a->l1.me16x16.cost = x264_rd_cost_mb( h, a->i_lambda2 ); + + /* BI */ + h->mb.i_type = B_BI_BI; + a->i_cost16x16bi = x264_rd_cost_mb( h, a->i_lambda2 ); + } } static inline void x264_mb_cache_mv_p8x8( x264_t *h, x264_mb_analysis_t *a, int i ) @@ -1439,6 +1477,16 @@ static void x264_mb_analyse_inter_b8x8( x264_t *h, x264_mb_analysis_t *a ) /* mb type cost */ a->i_cost8x8bi += a->i_lambda * i_mb_b_cost_table[B_8x8]; + + if( a->b_mbrd ) + { + if( a->i_cost8x8bi < a->i_best_satd ) + a->i_best_satd = a->i_cost8x8bi; + + h->mb.i_type = B_8x8; + h->mb.i_partition = D_8x8; + a->i_cost8x8bi = x264_rd_cost_mb( h, a->i_lambda2 ); + } } static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a ) @@ -1502,8 +1550,7 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a ) } a->i_cost16x8bi += i_part_cost; - if( i == 0 ) - x264_mb_cache_mv_b16x8( h, a, i, 0 ); + x264_mb_cache_mv_b16x8( h, a, i, 0 ); } /* mb type cost */ @@ -1511,6 +1558,16 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a ) + (a->i_mb_partition16x8[0]>>2) * 3 + (a->i_mb_partition16x8[1]>>2); a->i_cost16x8bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type16x8]; + + if( a->b_mbrd ) + { + if( a->i_cost16x8bi < a->i_best_satd ) + a->i_best_satd = a->i_cost16x8bi; + + h->mb.i_type = a->i_mb_type16x8; + h->mb.i_partition = D_16x8; + a->i_cost16x8bi = x264_rd_cost_mb( h, a->i_lambda2 ); + } } static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) { @@ -1572,8 +1629,7 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) } a->i_cost8x16bi += i_part_cost; - if( i == 0 ) - x264_mb_cache_mv_b8x16( h, a, i, 0 ); + x264_mb_cache_mv_b8x16( h, a, i, 0 ); } /* mb type cost */ @@ -1581,6 +1637,16 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a ) + (a->i_mb_partition8x16[0]>>2) * 3 + (a->i_mb_partition8x16[1]>>2); a->i_cost8x16bi += a->i_lambda * i_mb_b16x8_cost_table[a->i_mb_type8x16]; + + if( a->b_mbrd ) + { + if( a->i_cost8x16bi < a->i_best_satd ) + a->i_best_satd = a->i_cost8x16bi; + + h->mb.i_type = a->i_mb_type8x16; + h->mb.i_partition = D_8x16; + a->i_cost8x16bi = x264_rd_cost_mb( h, a->i_lambda2 ); + } } static inline void x264_mb_analyse_transform( x264_t *h ) @@ -1621,6 +1687,10 @@ static inline void x264_mb_analyse_transform_rd( x264_t *h, x264_mb_analysis_t * { if( *i_cost > 0 ) a->i_best_satd = (int64_t)a->i_best_satd * i_cost8 / *i_cost; + /* prevent a rare division by zero in x264_mb_analyse_intra */ + if( a->i_best_satd == 0 ) + a->i_best_satd = 1; + *i_cost = i_cost8; } else @@ -1892,6 +1962,7 @@ void x264_macroblock_analyse( x264_t *h ) } else if( h->sh.i_type == SLICE_TYPE_B ) { + int i_bskip_cost = COST_MAX; int b_skip = 0; analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h ); @@ -1899,15 +1970,34 @@ void x264_macroblock_analyse( x264_t *h ) { h->mb.i_type = B_SKIP; x264_mb_mc( h ); - - /* Conditioning the probe on neighboring block types - * doesn't seem to help speed or quality. */ - b_skip = !h->mb.b_lossless && x264_macroblock_probe_bskip( h ); + if( analysis.b_mbrd ) + { + i_bskip_cost = h->pixf.ssd[PIXEL_16x16]( h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0], + h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] ) + + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[1], h->mb.pic.i_stride[1], + h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] ) + + h->pixf.ssd[PIXEL_8x8]( h->mb.pic.p_fenc[2], h->mb.pic.i_stride[2], + h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] ); + + if( i_bskip_cost == 0 ) + { + h->mb.i_type = B_SKIP; + x264_analyse_update_cache( h, &analysis ); + return; + } + } + else + { + /* Conditioning the probe on neighboring block types + * doesn't seem to help speed or quality. */ + b_skip = !h->mb.b_lossless && x264_macroblock_probe_bskip( h ); + } } if( !b_skip ) { const unsigned int flags = h->param.analyse.inter; + int i_type; int i_partition; x264_mb_analyse_load_costs( h, &analysis ); @@ -1919,22 +2009,22 @@ void x264_macroblock_analyse( x264_t *h ) x264_mb_analyse_inter_b16x16( h, &analysis ); - h->mb.i_type = B_L0_L0; + i_type = B_L0_L0; i_partition = D_16x16; i_cost = analysis.l0.me16x16.cost; if( analysis.l1.me16x16.cost < i_cost ) { - h->mb.i_type = B_L1_L1; + i_type = B_L1_L1; i_cost = analysis.l1.me16x16.cost; } if( analysis.i_cost16x16bi < i_cost ) { - h->mb.i_type = B_BI_BI; + i_type = B_BI_BI; i_cost = analysis.i_cost16x16bi; } if( analysis.i_cost16x16direct < i_cost ) { - h->mb.i_type = B_DIRECT; + i_type = B_DIRECT; i_cost = analysis.i_cost16x16direct; } @@ -1943,7 +2033,7 @@ void x264_macroblock_analyse( x264_t *h ) x264_mb_analyse_inter_b8x8( h, &analysis ); if( analysis.i_cost8x8bi < i_cost ) { - h->mb.i_type = B_8x8; + i_type = B_8x8; i_partition = D_8x8; i_cost = analysis.i_cost8x8bi; @@ -1955,7 +2045,7 @@ void x264_macroblock_analyse( x264_t *h ) { i_partition = D_16x8; i_cost = analysis.i_cost16x8bi; - h->mb.i_type = analysis.i_mb_type16x8; + i_type = analysis.i_mb_type16x8; } } if( h->mb.i_sub_partition[0] == h->mb.i_sub_partition[2] || @@ -1966,7 +2056,7 @@ void x264_macroblock_analyse( x264_t *h ) { i_partition = D_8x16; i_cost = analysis.i_cost8x16bi; - h->mb.i_type = analysis.i_mb_type8x16; + i_type = analysis.i_mb_type8x16; } } } @@ -1974,24 +2064,38 @@ void x264_macroblock_analyse( x264_t *h ) h->mb.i_partition = i_partition; + if( analysis.b_mbrd ) + { + if( i_bskip_cost <= i_cost ) + { + h->mb.i_type = B_SKIP; + x264_analyse_update_cache( h, &analysis ); + return; + } + else + { + h->mb.i_type = i_type; + x264_mb_analyse_transform_rd( h, &analysis, &i_cost ); + } + } /* refine qpel */ - if( i_partition == D_16x16 ) + else if( i_partition == D_16x16 ) { analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0]; analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1]; - if( h->mb.i_type == B_L0_L0 ) + if( i_type == B_L0_L0 ) { x264_me_refine_qpel( h, &analysis.l0.me16x16 ); i_cost = analysis.l0.me16x16.cost + analysis.i_lambda * i_mb_b_cost_table[B_L0_L0]; } - else if( h->mb.i_type == B_L1_L1 ) + else if( i_type == B_L1_L1 ) { x264_me_refine_qpel( h, &analysis.l1.me16x16 ); i_cost = analysis.l1.me16x16.cost + analysis.i_lambda * i_mb_b_cost_table[B_L1_L1]; } - else if( h->mb.i_type == B_BI_BI ) + else if( i_type == B_BI_BI ) { x264_me_refine_qpel( h, &analysis.l0.me16x16 ); x264_me_refine_qpel( h, &analysis.l1.me16x16 ); @@ -2058,19 +2162,21 @@ void x264_macroblock_analyse( x264_t *h ) if( analysis.i_sad_i16x16 < i_cost ) { - h->mb.i_type = I_16x16; + i_type = I_16x16; i_cost = analysis.i_sad_i16x16; } if( analysis.i_sad_i8x8 < i_cost ) { - h->mb.i_type = I_8x8; + i_type = I_8x8; i_cost = analysis.i_sad_i8x8; } if( analysis.i_sad_i4x4 < i_cost ) { - h->mb.i_type = I_4x4; + i_type = I_4x4; i_cost = analysis.i_sad_i4x4; } + + h->mb.i_type = i_type; } } @@ -2159,8 +2265,6 @@ static void x264_analyse_update_cache( x264_t *h, x264_mb_analysis_t *a ) } case B_SKIP: - /* nothing has changed since x264_macroblock_probe_bskip */ - break; case B_DIRECT: x264_mb_load_mv_direct8x8( h, 0 ); x264_mb_load_mv_direct8x8( h, 1 ); diff --git a/x264.c b/x264.c index 95573936..50f503f7 100644 --- a/x264.c +++ b/x264.c @@ -253,6 +253,7 @@ static void Help( x264_param_t *defaults ) " --merange Maximum motion vector search range [%d]\n" " -m, --subme Subpixel motion estimation and partition\n" " decision quality: 1=fast, 6=best. [%d]\n" + " --b-rdo RD based mode decision for B-frames. Requires subme 6.\n" " --mixed-refs Decide references on a per partition basis\n" " --no-chroma-me Ignore chroma in motion estimation\n" " -8, --8x8dct Adaptive spatial transform size\n" @@ -476,6 +477,7 @@ static int Parse( int argc, char **argv, #define OPT_CHROMALOC 313 #define OPT_MIXED_REFS 314 #define OPT_CRF 315 +#define OPT_B_RDO 316 static struct option long_options[] = { @@ -509,6 +511,7 @@ static int Parse( int argc, char **argv, { "me", required_argument, NULL, OPT_ME }, { "merange", required_argument, NULL, OPT_MERANGE }, { "subme", required_argument, NULL, 'm' }, + { "b-rdo", no_argument, NULL, OPT_B_RDO }, { "mixed-refs", no_argument, NULL, OPT_MIXED_REFS }, { "no-chroma-me", no_argument, NULL, OPT_NO_CHROMA_ME }, { "8x8dct", no_argument, NULL, '8' }, @@ -730,6 +733,9 @@ static int Parse( int argc, char **argv, case 'm': param->analyse.i_subpel_refine = atoi(optarg); break; + case OPT_B_RDO: + param->analyse.b_bframe_rdo = 1; + break; case OPT_MIXED_REFS: param->analyse.b_mixed_references = 1; break; diff --git a/x264.h b/x264.h index b63b844e..1b914121 100644 --- a/x264.h +++ b/x264.h @@ -197,6 +197,7 @@ typedef struct int i_mv_range; /* maximum length of a mv (in pixels) */ int i_subpel_refine; /* subpixel motion estimation quality */ int b_chroma_me; /* chroma ME for subpel and mode decision in P-frames */ + int b_bframe_rdo; /* RD based mode decision for B-frames */ int b_mixed_references; /* allow each mb partition in P-frames to have it's own reference number */ int i_trellis; /* trellis RD quantization */