From 2ffe64ad5ceb7443fefceab0adf6722428825efe Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Fri, 16 Aug 2013 13:51:00 -0700 Subject: [PATCH] Cleanup/enhancements of switchable filter search Cleans up the switchable filter search logic. Also adds a speed feature - a variance threshold - to disable filter search if source variance is lower than this value. Results: derfraw300 threshold = 16, psnr -0.238%, 4-5% speedup (tested on football) threshold = 32, psnr -0.381%, 8-9% speedup (tested on football) threshold = 64, psnr -0.611%, 12-13% speedup (tested on football) threshold = 96, psnr -0.804%, 16-17% speedup (tested on football) Based on these results, the threshold is chosen as 16 for speed 1, 32 for speed 2, 64 for speed 3 and 96 for speed 4. Change-Id: Ib630d39192773b1983d3d349b97973768e170c04 --- vp9/encoder/vp9_onyx_if.c | 6 +- vp9/encoder/vp9_onyx_int.h | 4 +- vp9/encoder/vp9_rdopt.c | 290 +++++++++++++++++++------------------ 3 files changed, 157 insertions(+), 143 deletions(-) diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 44e9aa5db..13b8c82f2 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -716,7 +716,6 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_lastframe_partitioning = 0; sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; - sf->use_8tap_always = 0; sf->use_avoid_tested_higherror = 0; sf->reference_masking = 0; sf->skip_lots_of_modes = 0; @@ -735,6 +734,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_splitmv = 0; sf->mode_search_skip_flags = 0; sf->disable_split_var_thresh = 0; + sf->disable_filter_search_var_thresh = 0; sf->last_chroma_intra_mode = TM_PRED; sf->use_rd_breakout = 0; sf->skip_encode_sb = 0; @@ -794,6 +794,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->auto_min_max_partition_size = 1; sf->auto_min_max_partition_interval = 1; sf->disable_split_var_thresh = 32; + sf->disable_filter_search_var_thresh = 16; } if (speed == 2) { sf->adjust_thresholds_by_speed = 1; @@ -830,6 +831,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->disable_split_var_thresh = 64; sf->auto_min_max_partition_size = 1; sf->auto_min_max_partition_interval = 2; + sf->disable_filter_search_var_thresh = 32; } if (speed == 3) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -853,6 +855,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_method = BIGDIA; sf->subpel_iters_per_step = 1; sf->disable_split_var_thresh = 64; + sf->disable_filter_search_var_thresh = 64; } if (speed == 4) { sf->comp_inter_joint_search_thresh = BLOCK_SIZES; @@ -880,6 +883,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->search_method = HEX; sf->subpel_iters_per_step = 1; sf->disable_split_var_thresh = 64; + sf->disable_filter_search_var_thresh = 96; } /* if (speed == 2) { diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index de6f34c58..1b0e5cea0 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -259,7 +259,6 @@ typedef struct { int use_lastframe_partitioning; TX_SIZE_SEARCH_METHOD tx_size_search_method; int use_lp32x32fdct; - int use_8tap_always; int use_avoid_tested_higherror; int skip_lots_of_modes; int adjust_thresholds_by_speed; @@ -286,6 +285,9 @@ typedef struct { unsigned int mode_search_skip_flags; // A source variance threshold below which the split mode is disabled unsigned int disable_split_var_thresh; + // A source variance threshold below which filter search is disabled + // Choose a very large value (UINT_MAX) to use 8-tap always + unsigned int disable_filter_search_var_thresh; MB_PREDICTION_MODE last_chroma_intra_mode; int use_rd_breakout; int use_uv_intra_rd_estimate; diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 36a772200..be4ca9311 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -2625,7 +2625,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t this_rd = 0; DECLARE_ALIGNED_ARRAY(16, uint8_t, tmp_buf, MAX_MB_PLANE * 64 * 64); int pred_exists = 0; - int interpolating_intpel_seen = 0; int intpel_mv; int64_t rd, best_rd = INT64_MAX; int best_needs_copy = 0; @@ -2738,7 +2737,6 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, } pred_exists = 0; - interpolating_intpel_seen = 0; // Are all MVs integer pel for Y and UV intpel_mv = (mbmi->mv[0].as_mv.row & 15) == 0 && (mbmi->mv[0].as_mv.col & 15) == 0; @@ -2747,97 +2745,97 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, (mbmi->mv[1].as_mv.col & 15) == 0; // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used - *best_filter = EIGHTTAP; - if (cpi->sf.use_8tap_always) { + if (cm->mcomp_filter_type != BILINEAR) { *best_filter = EIGHTTAP; - vp9_zero(cpi->rd_filter_cache); - } else { - int i, newbest; - int tmp_rate_sum = 0; - int64_t tmp_dist_sum = 0; - - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX; - for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { - int j; - int64_t rs_rd; - const int is_intpel_interp = intpel_mv; - mbmi->interp_filter = i; - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); - rs = get_switchable_rate(x); - rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); - - if (interpolating_intpel_seen && is_intpel_interp) { - cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, - tmp_rate_sum, tmp_dist_sum); - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], - cpi->rd_filter_cache[i] + rs_rd); - rd = cpi->rd_filter_cache[i]; - if (cm->mcomp_filter_type == SWITCHABLE) - rd += rs_rd; - } else { - int rate_sum = 0; - int64_t dist_sum = 0; - if ((cm->mcomp_filter_type == SWITCHABLE && - (!i || best_needs_copy)) || - (cm->mcomp_filter_type != SWITCHABLE && - (cm->mcomp_filter_type == mbmi->interp_filter || - (!interpolating_intpel_seen && is_intpel_interp)))) { - for (j = 0; j < MAX_MB_PLANE; j++) { - xd->plane[j].dst.buf = orig_dst[j]; - xd->plane[j].dst.stride = orig_dst_stride[j]; - } + if (x->source_variance < + cpi->sf.disable_filter_search_var_thresh) { + *best_filter = EIGHTTAP; + vp9_zero(cpi->rd_filter_cache); + } else { + int i, newbest; + int tmp_rate_sum = 0; + int64_t tmp_dist_sum = 0; + + cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX; + for (i = 0; i < VP9_SWITCHABLE_FILTERS; ++i) { + int j; + int64_t rs_rd; + mbmi->interp_filter = i; + vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + rs = get_switchable_rate(x); + rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); + + if (i > 0 && intpel_mv) { + cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, + tmp_rate_sum, tmp_dist_sum); + cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = + MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], + cpi->rd_filter_cache[i] + rs_rd); + rd = cpi->rd_filter_cache[i]; + if (cm->mcomp_filter_type == SWITCHABLE) + rd += rs_rd; } else { - for (j = 0; j < MAX_MB_PLANE; j++) { - xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; - xd->plane[j].dst.stride = 64; + int rate_sum = 0; + int64_t dist_sum = 0; + if ((cm->mcomp_filter_type == SWITCHABLE && + (!i || best_needs_copy)) || + (cm->mcomp_filter_type != SWITCHABLE && + (cm->mcomp_filter_type == mbmi->interp_filter || + (i == 0 && intpel_mv)))) { + for (j = 0; j < MAX_MB_PLANE; j++) { + xd->plane[j].dst.buf = orig_dst[j]; + xd->plane[j].dst.stride = orig_dst_stride[j]; + } + } else { + for (j = 0; j < MAX_MB_PLANE; j++) { + xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; + xd->plane[j].dst.stride = 64; + } + } + vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); + model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); + cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, + rate_sum, dist_sum); + cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = + MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], + cpi->rd_filter_cache[i] + rs_rd); + rd = cpi->rd_filter_cache[i]; + if (cm->mcomp_filter_type == SWITCHABLE) + rd += rs_rd; + if (i == 0 && intpel_mv) { + tmp_rate_sum = rate_sum; + tmp_dist_sum = dist_sum; } } - vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum); - cpi->rd_filter_cache[i] = RDCOST(x->rdmult, x->rddiv, - rate_sum, dist_sum); - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], - cpi->rd_filter_cache[i] + rs_rd); - rd = cpi->rd_filter_cache[i]; - if (cm->mcomp_filter_type == SWITCHABLE) - rd += rs_rd; - if (!interpolating_intpel_seen && is_intpel_interp) { - tmp_rate_sum = rate_sum; - tmp_dist_sum = dist_sum; - } - } - if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { - if (rd / 2 > ref_best_rd) { - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; + if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { + if (rd / 2 > ref_best_rd) { + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = orig_dst[i]; + xd->plane[i].dst.stride = orig_dst_stride[i]; + } + return INT64_MAX; } - return INT64_MAX; } - } - newbest = i == 0 || rd < best_rd; - - if (newbest) { - best_rd = rd; - *best_filter = mbmi->interp_filter; - if (cm->mcomp_filter_type == SWITCHABLE && i && - !(interpolating_intpel_seen && is_intpel_interp)) - best_needs_copy = !best_needs_copy; - } + newbest = i == 0 || rd < best_rd; + + if (newbest) { + best_rd = rd; + *best_filter = mbmi->interp_filter; + if (cm->mcomp_filter_type == SWITCHABLE && i && !intpel_mv) + best_needs_copy = !best_needs_copy; + } - if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || - (cm->mcomp_filter_type != SWITCHABLE && - cm->mcomp_filter_type == mbmi->interp_filter)) { - pred_exists = 1; + if ((cm->mcomp_filter_type == SWITCHABLE && newbest) || + (cm->mcomp_filter_type != SWITCHABLE && + cm->mcomp_filter_type == mbmi->interp_filter)) { + pred_exists = 1; + } } - interpolating_intpel_seen |= is_intpel_interp; - } - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].dst.buf = orig_dst[i]; - xd->plane[i].dst.stride = orig_dst_stride[i]; + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].dst.buf = orig_dst[i]; + xd->plane[i].dst.stride = orig_dst_stride[i]; + } } } // Set the appropriate filter @@ -3486,66 +3484,76 @@ int64_t vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, xd->mode_info_context->mbmi.txfm_size = TX_4X4; cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = INT64_MAX; - for (switchable_filter_index = 0; - switchable_filter_index < VP9_SWITCHABLE_FILTERS; - ++switchable_filter_index) { - int newbest, rs; - int64_t rs_rd; - mbmi->interp_filter = switchable_filter_index; - vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); - - tmp_rd = rd_pick_best_mbsegmentation(cpi, x, - &mbmi->ref_mvs[ref_frame][0], - second_ref, - best_yrd, - &rate, &rate_y, &distortion, - &skippable, &total_sse, - (int)this_rd_thresh, seg_mvs, - bsi, switchable_filter_index, - mi_row, mi_col); - if (tmp_rd == INT64_MAX) - continue; - - cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; - rs = get_switchable_rate(x); - rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); - cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = - MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], tmp_rd + rs_rd); - if (cm->mcomp_filter_type == SWITCHABLE) - tmp_rd += rs_rd; - - newbest = (tmp_rd < tmp_best_rd); - if (newbest) { - tmp_best_filter = mbmi->interp_filter; - tmp_best_rd = tmp_rd; - } - if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || - (mbmi->interp_filter == cm->mcomp_filter_type && - cm->mcomp_filter_type != SWITCHABLE)) { - tmp_best_rdu = tmp_rd; - tmp_best_rate = rate; - tmp_best_ratey = rate_y; - tmp_best_distortion = distortion; - tmp_best_sse = total_sse; - tmp_best_skippable = skippable; - tmp_best_mbmode = *mbmi; - tmp_best_partition = *x->partition_info; - for (i = 0; i < 4; i++) - tmp_best_bmodes[i] = xd->mode_info_context->bmi[i]; - pred_exists = 1; - if (switchable_filter_index == 0 && - cpi->sf.use_rd_breakout && - best_rd < INT64_MAX) { - if (tmp_best_rdu / 2 > best_rd) { - // skip searching the other filters if the first is - // already substantially larger than the best so far + if (cm->mcomp_filter_type != BILINEAR) { + tmp_best_filter = EIGHTTAP; + if (x->source_variance < + cpi->sf.disable_filter_search_var_thresh) { + tmp_best_filter = EIGHTTAP; + vp9_zero(cpi->rd_filter_cache); + } else { + for (switchable_filter_index = 0; + switchable_filter_index < VP9_SWITCHABLE_FILTERS; + ++switchable_filter_index) { + int newbest, rs; + int64_t rs_rd; + mbmi->interp_filter = switchable_filter_index; + vp9_setup_interp_filters(xd, mbmi->interp_filter, &cpi->common); + + tmp_rd = rd_pick_best_mbsegmentation(cpi, x, + &mbmi->ref_mvs[ref_frame][0], + second_ref, + best_yrd, + &rate, &rate_y, &distortion, + &skippable, &total_sse, + (int)this_rd_thresh, seg_mvs, + bsi, switchable_filter_index, + mi_row, mi_col); + + if (tmp_rd == INT64_MAX) + continue; + cpi->rd_filter_cache[switchable_filter_index] = tmp_rd; + rs = get_switchable_rate(x); + rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); + cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS] = + MIN(cpi->rd_filter_cache[VP9_SWITCHABLE_FILTERS], + tmp_rd + rs_rd); + if (cm->mcomp_filter_type == SWITCHABLE) + tmp_rd += rs_rd; + + newbest = (tmp_rd < tmp_best_rd); + if (newbest) { tmp_best_filter = mbmi->interp_filter; - tmp_best_rdu = INT64_MAX; - break; + tmp_best_rd = tmp_rd; } - } + if ((newbest && cm->mcomp_filter_type == SWITCHABLE) || + (mbmi->interp_filter == cm->mcomp_filter_type && + cm->mcomp_filter_type != SWITCHABLE)) { + tmp_best_rdu = tmp_rd; + tmp_best_rate = rate; + tmp_best_ratey = rate_y; + tmp_best_distortion = distortion; + tmp_best_sse = total_sse; + tmp_best_skippable = skippable; + tmp_best_mbmode = *mbmi; + tmp_best_partition = *x->partition_info; + for (i = 0; i < 4; i++) + tmp_best_bmodes[i] = xd->mode_info_context->bmi[i]; + pred_exists = 1; + if (switchable_filter_index == 0 && + cpi->sf.use_rd_breakout && + best_rd < INT64_MAX) { + if (tmp_best_rdu / 2 > best_rd) { + // skip searching the other filters if the first is + // already substantially larger than the best so far + tmp_best_filter = mbmi->interp_filter; + tmp_best_rdu = INT64_MAX; + break; + } + } + } + } // switchable_filter_index loop } - } // switchable_filter_index loop + } if (tmp_best_rdu == INT64_MAX) continue; -- 2.40.0