From 3b5a90bd868a3469659e4cb78ec37032863cb92d Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Thu, 12 Dec 2013 17:31:04 -0800 Subject: [PATCH] Enable adaptive pred filter type for sub8x8 This commit enables an adaptive prediction filter type selection for sub8x8 block sizes. In speed 1, it re-uses the filter type of collocated 8x8 block if it is tested in the rate-distortion optimization loop, for the sub8x8 blocks. Otherwise, it runs the normal test over all the three filter types. In speed 2, it re-uses the 8x8 block's prediction filter type, if available. Otherwise, force it to be EIGHTTAP. Compression and speed performance wise: speed 1 derf -0.266% yt -0.138% bus at 2000 kbps: 33766ms -> 30451ms (10% speed-up) football at 600 kbps: 48173ms -> 43786ms (9% speed-up) speed 2 derf -0.026% yt +0.134% bus at 2000 kbps: 18973ms -> 17698ms (6% speed-up) football at 600 kbps: 26748ms -> 25096ms (6% speed-up) Change-Id: I77e097533b969fd3472147225fa79fc98095d342 --- vp9/encoder/vp9_block.h | 1 + vp9/encoder/vp9_encodeframe.c | 32 ++++++++++++++++++++++++++++++++ vp9/encoder/vp9_onyx_if.c | 5 +++++ vp9/encoder/vp9_onyx_int.h | 1 + vp9/encoder/vp9_rdopt.c | 10 +++++++++- 5 files changed, 48 insertions(+), 1 deletion(-) diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index 00883385e..737fad4c2 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -59,6 +59,7 @@ typedef struct { // motion vector cache for adaptive motion search control in partition // search loop int_mv pred_mv[MAX_REF_FRAMES]; + int pred_filter_type; // Bit flag for each mode whether it has high error in comparison to others. unsigned int modes_with_high_error; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 21cace6e8..5f9d0c94c 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1686,6 +1686,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = i; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); + if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + partition_none_allowed) + get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, bsize)->mic.mbmi.interp_filter; rd_pick_partition(cpi, tile, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rate, &this_dist, i != 3, best_rd - sum_rd); @@ -1733,6 +1737,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); + if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + partition_none_allowed) + get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, bsize)->mic.mbmi.interp_filter; pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); @@ -1744,6 +1752,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); + if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + partition_none_allowed) + get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, bsize)->mic.mbmi.interp_filter; pick_sb_modes(cpi, tile, mi_row + ms, mi_col, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); @@ -1778,6 +1790,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 0; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); + if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + partition_none_allowed) + get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, bsize)->mic.mbmi.interp_filter; pick_sb_modes(cpi, tile, mi_row, mi_col, &sum_rate, &sum_dist, subsize, get_block_context(x, subsize), best_rd); sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); @@ -1788,6 +1804,10 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, *get_sb_index(x, subsize) = 1; if (cpi->sf.adaptive_motion_search) load_pred_mv(x, get_block_context(x, bsize)); + if (cpi->sf.adaptive_pred_filter_type && bsize == BLOCK_8X8 && + partition_none_allowed) + get_block_context(x, subsize)->pred_filter_type = + get_block_context(x, bsize)->mic.mbmi.interp_filter; pick_sb_modes(cpi, tile, mi_row, mi_col + ms, &this_rate, &this_dist, subsize, get_block_context(x, subsize), best_rd - sum_rd); @@ -1889,6 +1909,18 @@ static void encode_sb_row(VP9_COMP *cpi, const TileInfo *const tile, int dummy_rate; int64_t dummy_dist; + BLOCK_SIZE i; + MACROBLOCK *x = &cpi->mb; + for (i = BLOCK_4X4; i < BLOCK_8X8; ++i) { + const int num_4x4_w = num_4x4_blocks_wide_lookup[i]; + const int num_4x4_h = num_4x4_blocks_high_lookup[i]; + const int num_4x4_blk = MAX(4, num_4x4_w * num_4x4_h); + for (x->sb_index = 0; x->sb_index < 4; ++x->sb_index) + for (x->mb_index = 0; x->mb_index < 4; ++x->mb_index) + for (x->b_index = 0; x->b_index < 16 / num_4x4_blk; ++x->b_index) + get_block_context(x, i)->pred_filter_type = SWITCHABLE; + } + vp9_zero(cpi->mb.pred_mv); if (cpi->sf.reference_masking) diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index 578fb421c..e010c0917 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -654,6 +654,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; + sf->adaptive_pred_filter_type = 0; sf->use_avoid_tested_higherror = 0; sf->reference_masking = 0; sf->use_one_partition_size_always = 0; @@ -717,6 +718,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 1; sf->auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; sf->recode_loop = 2; @@ -744,6 +746,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 2; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 16; @@ -779,6 +782,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 2; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 16; @@ -812,6 +816,7 @@ void vp9_set_speed_features(VP9_COMP *cpi) { sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; + sf->adaptive_pred_filter_type = 2; sf->auto_mv_step_size = 1; sf->disable_filter_search_var_thresh = 16; diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index 13ab75330..8f2ffc989 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -270,6 +270,7 @@ typedef struct { int using_small_partition_info; // TODO(jingning): combine the related motion search speed features int adaptive_motion_search; + int adaptive_pred_filter_type; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 9bade9849..cdba1e867 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -4075,6 +4075,14 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, cpi->sf.disable_filter_search_var_thresh) { tmp_best_filter = EIGHTTAP; vp9_zero(cpi->rd_filter_cache); + } else if (cpi->sf.adaptive_pred_filter_type == 1 && + ctx->pred_filter_type < SWITCHABLE) { + tmp_best_filter = ctx->pred_filter_type; + vp9_zero(cpi->rd_filter_cache); + } else if (cpi->sf.adaptive_pred_filter_type == 2) { + tmp_best_filter = ctx->pred_filter_type < SWITCHABLE ? + ctx->pred_filter_type : 0; + vp9_zero(cpi->rd_filter_cache); } else { for (switchable_filter_index = 0; switchable_filter_index < SWITCHABLE_FILTERS; @@ -4141,7 +4149,7 @@ int64_t vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, } } - if (tmp_best_rdu == INT64_MAX) + if (tmp_best_rdu == INT64_MAX && pred_exists) continue; mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE ? -- 2.40.0