From bb260d90764226c48d40f5b4fdeaa6a113cd9ba5 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Thu, 2 Oct 2014 17:49:00 -0700 Subject: [PATCH] Rework partition search skip scheme This commit enables the encoder to skip split partition search if the bigger block size has all non-zero quantized coefficients in low frequency area and the total rate cost is below a certain threshold. It logarithmatically scales the rate threshold according to the current block size. For speed 3, the compression performance loss: derf -0.093% stdhd -0.066% Local experiments show 4% - 20% encoding speed-up for speed 3. blue_sky_1080p, 1500 kbps 51051 b/f, 35.891 dB, 67236 ms -> 50554 b/f, 35.857 dB, 59270 ms (12% speed-up) old_town_cross_720p, 1500 kbps 14431 b/f, 36.249 dB, 57687 ms -> 14108 b/f, 36.172 dB, 46586 ms (19% speed-up) pedestrian_area_1080p, 1500 kbps 50812 b/f, 40.124 dB, 100439 ms -> 50755 b/f, 40.118 dB, 96549 ms (4% speed-up) mobile_calendar_720p, 1000 kbps 10352 b/f, 35.055 dB, 51837 ms -> 10172 b/f, 35.003 dB, 44076 ms (15% speed-up) Change-Id: I412e34db49060775b3b89ba1738522317c3239c8 --- vp9/encoder/vp9_encodeframe.c | 2 ++ vp9/encoder/vp9_rdopt.c | 8 +++++++- vp9/encoder/vp9_speed_features.c | 17 ++++++++--------- vp9/encoder/vp9_tokenize.c | 18 ++++++++++++++++++ vp9/encoder/vp9_tokenize.h | 1 + 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 2100099ac..4c539d3a5 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -2267,6 +2267,8 @@ static void rd_pick_partition(VP9_COMP *cpi, const TileInfo *const tile, dist_breakout_thr >>= 8 - (b_width_log2(bsize) + b_height_log2(bsize)); + rate_breakout_thr *= num_pels_log2_lookup[bsize]; + // If all y, u, v transform blocks in this partition are skippable, and // the dist & rate are within the thresholds, the partition search is // terminated for current branch of the partition search tree. diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index b14ff0822..7604b3a9e 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1948,11 +1948,17 @@ static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) { MACROBLOCKD *const xd = &x->e_mbd; + int plane, has_high_freq_coeff = 0; + BLOCK_SIZE bsize = xd->mi[0].src_mi->mbmi.sb_type; + + if (bsize >= BLOCK_8X8) + for (plane = 0; plane < MAX_MB_PLANE; ++plane) + has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); // Take a snapshot of the coding context so it can be // restored if we decide to encode this way ctx->skip = x->skip; - ctx->skippable = skippable; + ctx->skippable = skippable || !has_high_freq_coeff; ctx->best_mode_index = mode_index; ctx->mic = *xd->mi[0].src_mi; ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index d392de64a..062da09a0 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -59,7 +59,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->partition_search_breakout_dist_thr = (1 << 23); else sf->partition_search_breakout_dist_thr = (1 << 21); - sf->partition_search_breakout_rate_thr = 500; + sf->partition_search_breakout_rate_thr = 80; } if (speed >= 2) { @@ -70,8 +70,12 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; + sf->partition_search_breakout_dist_thr = (1 << 24); + sf->partition_search_breakout_rate_thr = 120; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; + sf->partition_search_breakout_dist_thr = (1 << 22); + sf->partition_search_breakout_rate_thr = 100; } sf->reference_masking = 1; @@ -83,12 +87,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->auto_min_max_partition_size = CONSTRAIN_NEIGHBORING_MIN_MAX; - if (MIN(cm->width, cm->height) >= 720) - sf->partition_search_breakout_dist_thr = (1 << 24); - else - sf->partition_search_breakout_dist_thr = (1 << 22); - sf->partition_search_breakout_rate_thr = 700; - sf->allow_partition_search_skip = 1; } @@ -99,11 +97,13 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; sf->partition_search_breakout_dist_thr = (1 << 25); + sf->partition_search_breakout_rate_thr = 200; } else { sf->max_intra_bsize = BLOCK_32X32; sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0; sf->partition_search_breakout_dist_thr = (1 << 23); + sf->partition_search_breakout_rate_thr = 120; } sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; sf->adaptive_pred_interp_filter = 0; @@ -117,7 +117,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->adaptive_interp_filter_search = 1; - sf->partition_search_breakout_rate_thr = 1000; } if (speed >= 4) { @@ -138,7 +137,7 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm, sf->partition_search_breakout_dist_thr = (1 << 26); else sf->partition_search_breakout_dist_thr = (1 << 24); - sf->partition_search_breakout_rate_thr = 1500; + sf->partition_search_breakout_rate_thr = 300; } if (speed >= 5) { diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 7b0a40684..adf01bf35 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -403,6 +403,24 @@ int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { return result; } +static void has_high_freq_coeff(int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + void *argv) { + struct is_skippable_args *args = argv; + int eobs = (tx_size == TX_4X4) ? 3 : 10; + (void) plane_bsize; + + *(args->skippable) |= (args->x->plane[plane].eobs[block] > eobs); +} + +int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { + int result = 0; + struct is_skippable_args args = {x, &result}; + vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, + has_high_freq_coeff, &args); + return result; +} + void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index a657abc15..825252bac 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -49,6 +49,7 @@ extern const vp9_tree_index vp9_coef_con_tree[]; extern struct vp9_token vp9_coef_encodings[]; int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); +int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); struct VP9_COMP; -- 2.40.0