From 97848890a99af6604df13ec38c094a01aea40f0e Mon Sep 17 00:00:00 2001 From: Marco Paniconi Date: Wed, 8 Aug 2018 14:01:26 -0700 Subject: [PATCH] vp9: Allow for overshoot detection for non-screen CBR mode. For CBR real-time mode: refactor usage of speed feature to handle overshoot on slide/scene change. Add 2 modes to indicate how slide/scene change is processed for re-setting Q/rate control. Keep the speed setting to 1 for speed >= 5, otherwise set to 0. Video content and screen content are now handled in similar way, though with different thresholds. Some fixes to thresholds and reset: correct the reset of the buffer level to optimal level for each temporal layer, if scene change frame will be encoded at max_q. Also increase the min_thresh for video mode (non-screen content): this is to avoid scene change detection on cases like large lighting changes, cameras focus. And increase in min_thresh makes it more robust to sudden increase in noise level. Change-Id: I256d350da6e92d2ddc09f100fc06ac147cbc1e49 --- vp9/encoder/vp9_encoder.c | 16 ++++++++-------- vp9/encoder/vp9_ratectrl.c | 26 +++++++++++++------------- vp9/encoder/vp9_speed_features.c | 11 ++++------- vp9/encoder/vp9_speed_features.h | 14 +++++++++++--- 4 files changed, 36 insertions(+), 31 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 3db11fcb0..335a0b292 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -4052,13 +4052,12 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, vp9_svc_assert_constraints_pattern(cpi); } - if (!cpi->sf.re_encode_overshoot_rt && - cpi->oxcf.content == VP9E_CONTENT_SCREEN && + // Check if this high_source_sad (scene/slide change) frame should be + // encoded at high/max QP, and if so, set the q and adjust some rate + // control parameters. + if (cpi->sf.overshoot_detection_rt == 1 && (cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe))) { - // Check if this high_source_sad (scene/slide change) frame should be - // encoded at high/max QP, and if so, set the q and adjust some rate - // control parameters. if (vp9_encodedframe_overshoot(cpi, -1, &q)) { vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); @@ -4087,10 +4086,11 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, vp9_encode_frame(cpi); - // Check if we should drop this frame because of high overshoot. - // Only for frames where high temporal-source SAD is detected. + // Check if we should re-encode this frame at high Q because of high + // overshoot based on the encoded frame size. Only for frames where + // high temporal-source SAD is detected. // For SVC: all spatial layers are checked for re-encoding. - if (cpi->sf.re_encode_overshoot_rt && + if (cpi->sf.overshoot_detection_rt == 2 && (cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe))) { int frame_size = 0; diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index e0c877833..64db18f09 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -2648,10 +2648,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { float thresh = 8.0f; uint32_t thresh_key = 140000; if (cpi->oxcf.speed <= 5) thresh_key = 240000; - if (cpi->oxcf.rc_mode == VPX_VBR) { - min_thresh = 65000; - thresh = 2.1f; - } + if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000; + if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f; if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) { const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2); @@ -2822,14 +2820,16 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { SPEED_FEATURES *const sf = &cpi->sf; int thresh_qp = 7 * (rc->worst_quality >> 3); int thresh_rate = rc->avg_frame_bandwidth << 3; - // Lower rate threshold for video. + // Lower thresh_qp for video (more overshoot at lower Q) to be + // more conservative for video. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) - thresh_rate = rc->avg_frame_bandwidth << 2; + thresh_qp = rc->worst_quality >> 1; // If this decision is not based on an encoded frame size but just on - // scene/slide change detection (i.e., re_encode_overshoot_rt = 0), adjust the - // qp_thresh and skip the (frame_size > thresh_rate) condition in this case. - if (!sf->re_encode_overshoot_rt) thresh_qp = 3 * (rc->worst_quality >> 2); - if ((!sf->re_encode_overshoot_rt || frame_size > thresh_rate) && + // scene/slide change detection (i.e., re_encode_overshoot_rt = 1), + // for now skip the (frame_size > thresh_rate) condition in this case. + // TODO(marpan): Use a better size/rate condition for this case and + // adjust thresholds. + if ((sf->overshoot_detection_rt == 1 || frame_size > thresh_rate) && cm->base_qindex < thresh_qp) { double rate_correction_factor = cpi->rc.rate_correction_factors[INTER_NORMAL]; @@ -2846,7 +2846,7 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { // and the encoded frame used alot of Intra modes, then force hybrid_intra // encoding for the re-encode on this scene change. hybrid_intra will // use rd-based intra mode selection for small blocks. - if (sf->re_encode_overshoot_rt && frame_size > (thresh_rate << 1) && + if (sf->overshoot_detection_rt == 2 && frame_size > (thresh_rate << 1) && cpi->svc.spatial_layer_id == 0) { MODE_INFO **mi = cm->mi_grid_visible; int sum_intra_usage = 0; @@ -2900,8 +2900,8 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; lrc->avg_frame_qindex[INTER_FRAME] = *q; - lrc->buffer_level = rc->optimal_buffer_level; - lrc->bits_off_target = rc->optimal_buffer_level; + lrc->buffer_level = lrc->optimal_buffer_level; + lrc->bits_off_target = lrc->optimal_buffer_level; lrc->rc_1_frame = 0; lrc->rc_2_frame = 0; lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor; diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 859e626bb..107da2194 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -405,7 +405,7 @@ static void set_rt_speed_feature_framesize_independent( sf->use_compound_nonrd_pickmode = 0; sf->nonrd_keyframe = 0; sf->svc_use_lowres_part = 0; - sf->re_encode_overshoot_rt = 0; + sf->overshoot_detection_rt = 0; sf->disable_16x16part_nonkey = 0; sf->disable_golden_ref = 0; sf->enable_tpl_model = 0; @@ -570,11 +570,9 @@ static void set_rt_speed_feature_framesize_independent( // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent // increase in encoding time. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) sf->nonrd_keyframe = 1; - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && - cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && - (cpi->use_svc || cpi->oxcf.content == VP9E_CONTENT_SCREEN)) { - sf->re_encode_overshoot_rt = 1; - } + if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && + cpi->oxcf.rc_mode == VPX_CBR) + sf->overshoot_detection_rt = 1; if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && cm->width <= 1280 && cm->height <= 720) { sf->use_altref_onepass = 1; @@ -583,7 +581,6 @@ static void set_rt_speed_feature_framesize_independent( } if (speed >= 6) { - sf->re_encode_overshoot_rt = 0; if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) { sf->use_altref_onepass = 1; sf->use_compound_nonrd_pickmode = 1; diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h index 89fc82ebf..7430f0a33 100644 --- a/vp9/encoder/vp9_speed_features.h +++ b/vp9/encoder/vp9_speed_features.h @@ -542,9 +542,17 @@ typedef struct SPEED_FEATURES { // For SVC: enables use of partition from lower spatial resolution. int svc_use_lowres_part; - // Enable re-encoding on scene change with potential high overshoot, - // for real-time encoding flow. - int re_encode_overshoot_rt; + // Flag to indicate process for handling overshoot on slide/scene change, + // for real-time CBR mode. + // 0: no reaction to rate control on a detected slide/scene change + // (prior to encoding the frame). + // 1: set to larger Q based only on the detected slide/scene change + // and current/past Q. No second pass encoding, so faster than option 2. + // 2: based on (first pass) encoded frame, if large frame size is detected + // then set to higher Q for second encode. This involves 2 pass encoding + // on slide change, so slower than 1, but more accurate for detecting + // overshoot. + int overshoot_detection_rt; // Disable partitioning of 16x16 blocks. int disable_16x16part_nonkey; -- 2.40.0