From: Dan Larkin Date: Wed, 13 Jul 2011 17:45:23 +0000 (-0500) Subject: Add --subme 11, which disables all early terminations in analysis X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4a88ee1c649d92bbdbbf128e22d547e9b833f00c;p=libx264 Add --subme 11, which disables all early terminations in analysis Necessary for a future trellis mode decision/motion estimation patch. Also add the slowest presets to the regression test. --- diff --git a/common/common.c b/common/common.c index addd5cd0..6705d8ec 100644 --- a/common/common.c +++ b/common/common.c @@ -275,7 +275,7 @@ static int x264_param_apply_preset( x264_param_t *param, const char *preset ) else if( !strcasecmp( preset, "placebo" ) ) { param->analyse.i_me_method = X264_ME_TESA; - param->analyse.i_subpel_refine = 10; + param->analyse.i_subpel_refine = 11; param->analyse.i_me_range = 24; param->i_frame_reference = 16; param->i_bframe_adaptive = X264_B_ADAPT_TRELLIS; diff --git a/encoder/analyse.c b/encoder/analyse.c index 833403ad..5e8be51c 100644 --- a/encoder/analyse.c +++ b/encoder/analyse.c @@ -132,6 +132,7 @@ typedef struct int i_mb_type8x16; int b_direct_available; + int b_early_terminate; } x264_mb_analysis_t; @@ -416,6 +417,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp ) /* mbrd == 3 -> QPRD */ a->i_mbrd = (subme>=6) + (subme>=8) + (h->param.analyse.i_subpel_refine>=10); h->mb.b_deblock_rdo = h->param.analyse.i_subpel_refine >= 9 && h->sh.i_disable_deblocking_filter_idc != 1; + a->b_early_terminate = h->param.analyse.i_subpel_refine < 11; x264_mb_analyse_init_qp( h, a, qp ); @@ -560,7 +562,7 @@ static void x264_mb_analyse_init( x264_t *h, x264_mb_analysis_t *a, int qp ) } /* Fast intra decision */ - if( h->mb.i_mb_xy - h->sh.i_first_mb > 4 ) + if( a->b_early_terminate && h->mb.i_mb_xy - h->sh.i_first_mb > 4 ) { /* Always run in fast-intra mode for subme < 3 */ if( h->mb.i_subpel_refine > 2 && @@ -927,7 +929,7 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ } /* Not heavily tuned */ static const uint8_t i8x8_thresh[11] = { 4, 4, 4, 5, 5, 5, 6, 6, 6, 6, 6 }; - if( X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 ) + if( a->b_early_terminate && X264_MIN(i_cost, a->i_satd_i16x16) > (i_satd_inter*i8x8_thresh[h->mb.i_subpel_refine])>>2 ) return; } @@ -935,10 +937,10 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ if( flags & X264_ANALYSE_I4x4 ) { int i_cost = lambda * (24+16); /* 24from JVT (SATD0), 16 from base predmode costs */ - int i_satd_thresh = X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 ); + int i_satd_thresh = a->b_early_terminate ? X264_MIN3( i_satd_inter, a->i_satd_i16x16, a->i_satd_i8x8 ) : COST_MAX; h->mb.i_cbp_luma = 0; - if( a->i_mbrd ) + if( a->b_early_terminate && a->i_mbrd ) i_satd_thresh = i_satd_thresh * (10-a->b_fast_intra)/8; if( h->sh.i_type == SLICE_TYPE_B ) @@ -1033,6 +1035,9 @@ static void x264_mb_analyse_intra( x264_t *h, x264_mb_analysis_t *a, int i_satd_ static void x264_intra_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_thresh ) { + if( !a->b_early_terminate ) + i_satd_thresh = COST_MAX; + if( a->i_satd_i16x16 < i_satd_thresh ) { h->mb.i_type = I_16x16; @@ -1072,7 +1077,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) { int old_pred_mode = a->i_predict16x16; const int8_t *predict_mode = predict_16x16_mode_available( h->mb.i_neighbour_intra ); - int i_thresh = a->i_satd_i16x16_dir[old_pred_mode] * 9/8; + int i_thresh = a->b_early_terminate ? a->i_satd_i16x16_dir[old_pred_mode] * 9/8 : COST_MAX; i_best = a->i_satd_i16x16; for( ; *predict_mode >= 0; predict_mode++ ) { @@ -1093,7 +1098,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) { int8_t predict_mode_sorted[4]; int i_max; - int i_thresh = a->i_satd_i8x8chroma * 5/4; + int i_thresh = a->b_early_terminate ? a->i_satd_i8x8chroma * 5/4 : COST_MAX; for( i_max = 0; *predict_mode >= 0; predict_mode++ ) { @@ -1197,7 +1202,7 @@ static void x264_intra_rd_refine( x264_t *h, x264_mb_analysis_t *a ) h->mb.pic.p_fdec[1] + 8*x + 8*y*FDEC_STRIDE, h->mb.pic.p_fdec[2] + 8*x + 8*y*FDEC_STRIDE}; int cbp_luma_new = 0; - int i_thresh = a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8; + int i_thresh = a->b_early_terminate ? a->i_satd_i8x8_dir[a->i_predict8x8[idx]][idx] * 11/8 : COST_MAX; i_best = COST_MAX64; @@ -1298,7 +1303,7 @@ static void x264_mb_analyse_inter_p16x16( x264_t *h, x264_mb_analysis_t *a ) int i_mvc; ALIGNED_4( int16_t mvc[8][2] ); int i_halfpel_thresh = INT_MAX; - int *p_halfpel_thresh = h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh : NULL; + int *p_halfpel_thresh = (a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh : NULL; /* 16x16 Search on all ref frame */ m.i_pixel = PIXEL_16x16; @@ -1388,8 +1393,8 @@ static void x264_mb_analyse_inter_p8x8_mixed_ref( x264_t *h, x264_mb_analysis_t /* early termination: if 16x16 chose ref 0, then evalute no refs older * than those used by the neighbors */ - if( i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) && - h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0 ) + if( a->b_early_terminate && (i_maxref > 0 && (a->l0.me16x16.i_ref == 0 || a->l0.me16x16.i_ref == h->mb.ref_blind_dupe) && + h->mb.i_mb_type_top > 0 && h->mb.i_mb_type_left[0] > 0) ) { i_maxref = 0; CHECK_NEIGHBOUR( -8 - 1 ); @@ -1572,7 +1577,7 @@ static void x264_mb_analyse_inter_p16x8( x264_t *h, x264_mb_analysis_t *a, int i /* Early termination based on the current SATD score of partition[0] plus the estimated SATD score of partition[1] */ - if( !i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 ) + if( a->b_early_terminate && (!i && l0m->cost + a->i_cost_est16x8[1] > i_best_satd * (4 + !!a->i_mbrd) / 4) ) { a->l0.i_cost16x8 = COST_MAX; return; @@ -1637,7 +1642,7 @@ static void x264_mb_analyse_inter_p8x16( x264_t *h, x264_mb_analysis_t *a, int i /* Early termination based on the current SATD score of partition[0] plus the estimated SATD score of partition[1] */ - if( !i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4 ) + if( a->b_early_terminate && (!i && l0m->cost + a->i_cost_est8x16[1] > i_best_satd * (4 + !!a->i_mbrd) / 4) ) { a->l0.i_cost8x16 = COST_MAX; return; @@ -1923,8 +1928,8 @@ static void x264_mb_analyse_inter_b16x16( x264_t *h, x264_mb_analysis_t *a ) int try_skip = a->b_try_skip; int list1_skipped = 0; int i_halfpel_thresh[2] = {INT_MAX, INT_MAX}; - int *p_halfpel_thresh[2] = {h->mb.pic.i_fref[0]>1 ? &i_halfpel_thresh[0] : NULL, - h->mb.pic.i_fref[1]>1 ? &i_halfpel_thresh[1] : NULL}; + int *p_halfpel_thresh[2] = {(a->b_early_terminate && h->mb.pic.i_fref[0]>1) ? &i_halfpel_thresh[0] : NULL, + (a->b_early_terminate && h->mb.pic.i_fref[1]>1) ? &i_halfpel_thresh[1] : NULL}; x264_me_t m; m.i_pixel = PIXEL_16x16; @@ -2454,8 +2459,8 @@ static void x264_mb_analyse_inter_b16x8( x264_t *h, x264_mb_analysis_t *a, int i /* Early termination based on the current SATD score of partition[0] plus the estimated SATD score of partition[1] */ - if( !i && i_part_cost + a->i_cost_est16x8[1] > i_best_satd - * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16 ) + if( a->b_early_terminate && (!i && i_part_cost + a->i_cost_est16x8[1] > i_best_satd + * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16) ) { a->i_cost16x8bi = COST_MAX; return; @@ -2547,8 +2552,8 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i /* Early termination based on the current SATD score of partition[0] plus the estimated SATD score of partition[1] */ - if( !i && i_part_cost + a->i_cost_est8x16[1] > i_best_satd - * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16 ) + if( a->b_early_terminate && (!i && i_part_cost + a->i_cost_est8x16[1] > i_best_satd + * (16 + (!!a->i_mbrd + !!h->mb.i_psy_rd))/16) ) { a->i_cost8x16bi = COST_MAX; return; @@ -2566,10 +2571,10 @@ static void x264_mb_analyse_inter_b8x16( x264_t *h, x264_mb_analysis_t *a, int i static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd ) { - int thresh = i_satd * 5/4 + 1; + int thresh = a->b_early_terminate ? i_satd * 5/4 + 1 : COST_MAX; h->mb.i_type = P_L0; - if( a->l0.i_rd16x16 == COST_MAX && a->l0.me16x16.cost <= i_satd * 3/2 ) + if( a->l0.i_rd16x16 == COST_MAX && (!a->b_early_terminate || a->l0.me16x16.cost <= i_satd * 3/2) ) { h->mb.i_partition = D_16x16; x264_analyse_update_cache( h, a ); @@ -2609,7 +2614,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd ) for( int i = 0; i < 4; i++ ) { int costs[4] = {a->l0.i_cost4x4[i], a->l0.i_cost8x4[i], a->l0.i_cost4x8[i], a->l0.me8x8[i].cost}; - int sub8x8_thresh = X264_MIN4( costs[0], costs[1], costs[2], costs[3] ) * 5 / 4; + int sub8x8_thresh = a->b_early_terminate ? X264_MIN4( costs[0], costs[1], costs[2], costs[3] ) * 5 / 4 : COST_MAX; int subtype, btype = D_L0_8x8; uint64_t bcost = COST_MAX64; for( subtype = D_L0_4x4; subtype <= D_L0_8x8; subtype++ ) @@ -2639,7 +2644,7 @@ static void x264_mb_analyse_p_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd ) static void x264_mb_analyse_b_rd( x264_t *h, x264_mb_analysis_t *a, int i_satd_inter ) { - int thresh = i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1; + int thresh = a->b_early_terminate ? i_satd_inter * (17 + (!!h->mb.i_psy_rd))/16 + 1 : COST_MAX; if( a->b_direct_available && a->i_rd16x16direct == COST_MAX ) { @@ -3020,8 +3025,8 @@ intra_analysis: i_partition = D_16x16; i_cost = analysis.l0.me16x16.cost; - if( ( flags & X264_ANALYSE_PSUB16x16 ) && - analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost ) + if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate || + analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost) ) { i_type = P_8x8; i_partition = D_8x8; @@ -3033,7 +3038,7 @@ intra_analysis: for( int i = 0; i < 4; i++ ) { x264_mb_analyse_inter_p4x4( h, &analysis, i ); - if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost ) + if( !analysis.b_early_terminate || analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost ) { int i_cost8x8 = analysis.l0.i_cost4x4[i]; h->mb.i_sub_partition[i] = D_L0_4x4; @@ -3056,8 +3061,8 @@ intra_analysis: /* Now do 16x8/8x16 */ i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv; - if( ( flags & X264_ANALYSE_PSUB16x16 ) && - analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8 ) + if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate || + analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8) ) { int i_avg_mv_ref_cost = (analysis.l0.me8x8[2].cost_mv + analysis.l0.me8x8[2].i_ref_cost + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1; @@ -3364,7 +3369,7 @@ intra_analysis: COPY2_IF_LT( i_cost, analysis.i_cost16x16bi, i_type, B_BI_BI ); COPY2_IF_LT( i_cost, analysis.i_cost16x16direct, i_type, B_DIRECT ); - if( analysis.i_mbrd && analysis.i_cost16x16direct <= i_cost * 33/32 ) + if( analysis.i_mbrd && analysis.b_early_terminate && analysis.i_cost16x16direct <= i_cost * 33/32 ) { x264_mb_analyse_b_rd( h, &analysis, i_cost ); if( i_bskip_cost < analysis.i_rd16x16direct && @@ -3431,17 +3436,17 @@ intra_analysis: /* We can gain a little speed by checking the mode with the lowest estimated cost first */ int try_16x8_first = i_cost_est16x8bi_total < i_cost_est8x16bi_total; - if( try_16x8_first && i_cost_est16x8bi_total < i_cost ) + if( try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) ) { x264_mb_analyse_inter_b16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 ); } - if( i_cost_est8x16bi_total < i_cost ) + if( !analysis.b_early_terminate || i_cost_est8x16bi_total < i_cost ) { x264_mb_analyse_inter_b8x16( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 ); } - if( !try_16x8_first && i_cost_est16x8bi_total < i_cost ) + if( !try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) ) { x264_mb_analyse_inter_b16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 ); diff --git a/encoder/encoder.c b/encoder/encoder.c index 7d112ec0..39b17a75 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -503,7 +503,7 @@ static int x264_validate_parameters( x264_t *h, int b_open ) h->param.rc.f_rf_constant = x264_clip3f( h->param.rc.f_rf_constant, -QP_BD_OFFSET, 51 ); h->param.rc.f_rf_constant_max = x264_clip3f( h->param.rc.f_rf_constant_max, -QP_BD_OFFSET, 51 ); h->param.rc.i_qp_constant = x264_clip3( h->param.rc.i_qp_constant, 0, QP_MAX ); - h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 10 ); + h->param.analyse.i_subpel_refine = x264_clip3( h->param.analyse.i_subpel_refine, 0, 11 ); h->param.rc.f_ip_factor = X264_MAX( h->param.rc.f_ip_factor, 0.01f ); h->param.rc.f_pb_factor = X264_MAX( h->param.rc.f_pb_factor, 0.01f ); if( h->param.rc.i_rc_method == X264_RC_CRF ) @@ -784,7 +784,7 @@ static int x264_validate_parameters( x264_t *h, int b_open ) h->param.rc.f_aq_strength = 0; } h->param.analyse.i_noise_reduction = x264_clip3( h->param.analyse.i_noise_reduction, 0, 1<<16 ); - if( h->param.analyse.i_subpel_refine == 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) ) + if( h->param.analyse.i_subpel_refine >= 10 && (h->param.analyse.i_trellis != 2 || !h->param.rc.i_aq_mode) ) h->param.analyse.i_subpel_refine = 9; { diff --git a/encoder/me.c b/encoder/me.c index 68518647..e21f2ca8 100644 --- a/encoder/me.c +++ b/encoder/me.c @@ -46,6 +46,7 @@ static const uint8_t subpel_iterations[][4] = {0,0,2,2}, {0,0,4,10}, {0,0,4,10}, + {0,0,4,10}, {0,0,4,10}}; /* (x-1)%6 */ diff --git a/tools/test_x264.py b/tools/test_x264.py index 2fee3458..a09e5eb8 100755 --- a/tools/test_x264.py +++ b/tools/test_x264.py @@ -44,7 +44,9 @@ OPTIONS = [ "fast", "medium", "slow", - "slower") ] + "slower", + "veryslow", + "placebo") ] ] # end options diff --git a/x264.c b/x264.c index bfa67b44..9aa03bfd 100644 --- a/x264.c +++ b/x264.c @@ -479,7 +479,7 @@ static void help( x264_param_t *defaults, int longhelp ) " --bframes 16 --b-adapt 2 --direct auto\n" " --slow-firstpass --no-fast-pskip\n" " --me tesa --merange 24 --partitions all\n" - " --rc-lookahead 60 --ref 16 --subme 10\n" + " --rc-lookahead 60 --ref 16 --subme 11\n" " --trellis 2\n" ); else H0( " - ultrafast,superfast,veryfast,faster,fast\n" " - medium,slow,slower,veryslow,placebo\n" ); @@ -650,8 +650,9 @@ static void help( x264_param_t *defaults, int longhelp ) " - 7: RD mode decision for all frames\n" " - 8: RD refinement for I/P-frames\n" " - 9: RD refinement for all frames\n" - " - 10: QP-RD - requires trellis=2, aq-mode>0\n" ); - else H1( " decision quality: 1=fast, 10=best.\n" ); + " - 10: QP-RD - requires trellis=2, aq-mode>0\n" + " - 11: Full RD: disable all early terminations\n" ); + else H1( " decision quality: 1=fast, 11=best\n" ); H1( " --psy-rd Strength of psychovisual optimization [\"%.1f:%.1f\"]\n" " #1: RD (requires subme>=6)\n" " #2: Trellis (requires trellis, experimental)\n",