From 064fc570ff8399536563e3846500fd99b273b034 Mon Sep 17 00:00:00 2001 From: Marco Date: Fri, 28 Jul 2017 10:29:12 -0700 Subject: [PATCH] vp9: Speed feature to adapt partition based on source_sad. Move the source_sad feature to speed 6 (from speed 7), and add speed feature to switch from the variance-based partition to reference_partition (which uses nonrd-pickmode for bsize selection) if source_sad is high. Currently used only for speed 6 for resoln <= 360p. About 4-5% improvement on 360p in RTC set. Some speed slowdown, but still ~30% faster than speed 5. Change-Id: Ib0330ee5fe9fdd2608aed91359a2a339d967491c --- vp9/encoder/vp9_encodeframe.c | 12 +++++++----- vp9/encoder/vp9_speed_features.c | 22 +++++++++++++--------- vp9/encoder/vp9_speed_features.h | 4 ++++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index d8115bc25..5220197fd 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -981,8 +981,8 @@ static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, } } -static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, - int sb_offset) { +static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, + int sb_offset) { unsigned int tmp_sse; uint64_t tmp_sad; unsigned int tmp_variance; @@ -994,7 +994,7 @@ static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, uint64_t avg_source_sad_threshold = 10000; uint64_t avg_source_sad_threshold2 = 12000; #if CONFIG_VP9_HIGHBITDEPTH - if (cpi->common.use_highbitdepth) return; + if (cpi->common.use_highbitdepth) return 0; #endif src_y += shift; last_src_y += shift; @@ -1025,7 +1025,7 @@ static void avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, cpi->content_state_sb_fd[sb_offset] = 0; } } - return; + return tmp_sad; } // This function chooses partitioning based on the variance between source and @@ -4167,7 +4167,9 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3); int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); - avg_source_sad(cpi, x, shift, sb_offset2); + int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2); + if (sf->adapt_partition_source_sad && source_sad > 40000) + partition_search_type = REFERENCE_PARTITION; } // Set the partition type of the 64X64 block diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index d91142845..6488dfd53 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -364,6 +364,7 @@ static void set_rt_speed_feature_framesize_independent( sf->copy_partition_flag = 0; sf->use_source_sad = 0; sf->use_simple_block_yrd = 0; + sf->adapt_partition_source_sad = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; @@ -527,6 +528,17 @@ static void set_rt_speed_feature_framesize_independent( sf->mv.search_method = NSTEP; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; + if (!cpi->external_resize) sf->use_source_sad = 1; + if (sf->use_source_sad) { + if (cm->width * cm->height <= 640 * 360) + sf->adapt_partition_source_sad = 1; + if (cpi->content_state_sb_fd == NULL && + (!cpi->use_svc || + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + cpi->content_state_sb_fd = (uint8_t *)vpx_calloc( + (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); + } + } if (cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) { // Enable short circuit for low temporal variance. sf->short_circuit_low_temp_var = 1; @@ -539,6 +551,7 @@ static void set_rt_speed_feature_framesize_independent( } if (speed >= 7) { + sf->adapt_partition_source_sad = 0; sf->adaptive_rd_thresh = 3; sf->mv.search_method = FAST_DIAMOND; sf->mv.fullpel_search_step_param = 10; @@ -552,15 +565,6 @@ static void set_rt_speed_feature_framesize_independent( if (cpi->svc.non_reference_frame) sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_EVENMORE; } - if (!cpi->external_resize) sf->use_source_sad = 1; - if (sf->use_source_sad) { - if (cpi->content_state_sb_fd == NULL && - (!cpi->use_svc || - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { - cpi->content_state_sb_fd = (uint8_t *)vpx_calloc( - (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); - } - } // Enable partition copy. For SVC only enabled for top spatial resolution // layer. cpi->max_copied_frame = 0; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index ee485a35f..11e3154a0 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -490,6 +490,10 @@ typedef struct SPEED_FEATURES { int use_source_sad; int use_simple_block_yrd; + + // If source sad of superblock is high, will switch from VARIANCE_PARTITION to + // REFERENCE_PARTITION (which selects partition based on the nonrd-pickmode). + int adapt_partition_source_sad; } SPEED_FEATURES; struct VP9_COMP; -- 2.40.0