From 7195ded2c5a8be2c343ce0372d6bc4d2c8389962 Mon Sep 17 00:00:00 2001 From: Marco Paniconi Date: Mon, 19 Nov 2018 14:13:48 -0800 Subject: [PATCH] vp9-svc: Reset temporal layers on scene change Reuse existing function for resetting temporal layer pattern. And fix to use first spatial layer to encode, and some refactoring in encode_without_recode_loop(). Change-Id: Ifb22bb9de793ecb8e73f410e125c7c12383da1d2 --- vp9/encoder/vp9_encoder.c | 77 +++++++++++++++++------------- vp9/encoder/vp9_ratectrl.c | 8 ++-- vp9/encoder/vp9_svc_layercontext.c | 7 +-- vp9/encoder/vp9_svc_layercontext.h | 2 +- 4 files changed, 54 insertions(+), 40 deletions(-) diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 976ba020d..7ced82186 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3700,15 +3700,16 @@ static INLINE void set_raw_source_frame(VP9_COMP *cpi) { static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest) { VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; int q = 0, bottom_index = 0, top_index = 0; int no_drop_scene_change = 0; const INTERP_FILTER filter_scaler = (is_one_pass_cbr_svc(cpi)) - ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] + ? svc->downsample_filter_type[svc->spatial_layer_id] : EIGHTTAP; const int phase_scaler = (is_one_pass_cbr_svc(cpi)) - ? cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] + ? svc->downsample_filter_phase[svc->spatial_layer_id] : 0; if (cm->show_existing_frame) { @@ -3716,8 +3717,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, return 1; } - cpi->svc.time_stamp_prev[cpi->svc.spatial_layer_id] = - cpi->svc.time_stamp_superframe; + svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe; // Flag to check if its valid to compute the source sad (used for // scene detection and for superblock content state in CBR mode). @@ -3731,25 +3731,25 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 2 && cpi->un_scaled_source->y_height == cm->height << 2 && - cpi->svc.scaled_temp.y_width == cm->width << 1 && - cpi->svc.scaled_temp.y_height == cm->height << 1) { + svc->scaled_temp.y_width == cm->width << 1 && + svc->scaled_temp.y_height == cm->height << 1) { // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2 // result will be saved in scaled_temp and might be used later. - const INTERP_FILTER filter_scaler2 = cpi->svc.downsample_filter_type[1]; - const int phase_scaler2 = cpi->svc.downsample_filter_phase[1]; + const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1]; + const int phase_scaler2 = svc->downsample_filter_phase[1]; cpi->Source = vp9_svc_twostage_scale( - cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp, + cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp, filter_scaler, phase_scaler, filter_scaler2, phase_scaler2); - cpi->svc.scaled_one_half = 1; + svc->scaled_one_half = 1; } else if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 1 && cpi->un_scaled_source->y_height == cm->height << 1 && - cpi->svc.scaled_one_half) { + svc->scaled_one_half) { // If the spatial layer is 1/2x1/2 and the scaling is already done in the // two-stage scaling, use the result directly. - cpi->Source = &cpi->svc.scaled_temp; - cpi->svc.scaled_one_half = 0; + cpi->Source = &svc->scaled_temp; + svc->scaled_one_half = 0; } else { cpi->Source = vp9_scale_if_required( cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0), @@ -3757,8 +3757,8 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } #ifdef OUTPUT_YUV_SVC_SRC // Write out at most 3 spatial layers. - if (is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id < 3) { - vpx_write_yuv_frame(yuv_svc_src[cpi->svc.spatial_layer_id], cpi->Source); + if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) { + vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source); } #endif // Unfiltered raw source used in metrics calculation if the source @@ -3778,9 +3778,9 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } if ((cpi->use_svc && - (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 || - cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 || - cpi->svc.current_superframe < 1)) || + (svc->spatial_layer_id < svc->number_spatial_layers - 1 || + svc->temporal_layer_id < svc->number_temporal_layers - 1 || + svc->current_superframe < 1)) || cpi->resize_pending || cpi->resize_state || cpi->external_resize || cpi->resize_state != ORIG) { cpi->compute_source_sad_onepass = 0; @@ -3829,20 +3829,31 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8))) vp9_scene_detection_onepass(cpi); - if (cpi->svc.spatial_layer_id == 0) - cpi->svc.high_source_sad_superframe = cpi->rc.high_source_sad; + if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) { + svc->high_source_sad_superframe = cpi->rc.high_source_sad; + // On scene change reset temporal layer pattern to TL0. + // TODO(marpan/jianj): Fix this to handle case where base + // spatial layers are skipped, in which case we should insert + // and reset to spatial layer 0 on scene change. + if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0) { + // rc->high_source_sad will get reset so copy it to restore it. + int tmp_high_source_sad = cpi->rc.high_source_sad; + vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME); + cpi->rc.high_source_sad = tmp_high_source_sad; + } + } // For 1 pass CBR, check if we are dropping this frame. // Never drop on key frame, if base layer is key for svc, // on scene change, or if superframe has layer sync. - if ((cpi->rc.high_source_sad || cpi->svc.high_source_sad_superframe) && - !(cpi->rc.use_post_encode_drop && cpi->svc.last_layer_dropped[0])) + if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) && + !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0])) no_drop_scene_change = 1; if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && !frame_is_intra_only(cm) && !no_drop_scene_change && - !cpi->svc.superframe_has_layer_sync && + !svc->superframe_has_layer_sync && (!cpi->use_svc || - !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { + !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { if (vp9_rc_drop_frame(cpi)) return 0; } @@ -3850,7 +3861,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // avoid this frame-level upsampling (for non intra_only frames). if (frame_is_intra_only(cm) == 0 && - !(is_one_pass_cbr_svc(cpi) && cpi->svc.force_zero_mode_spatial_ref)) { + !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) { vp9_scale_references(cpi); } @@ -3860,12 +3871,12 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi); if (cpi->sf.svc_use_lowres_part && - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) { - if (cpi->svc.prev_partition_svc == NULL) { + svc->spatial_layer_id == svc->number_spatial_layers - 2) { + if (svc->prev_partition_svc == NULL) { CHECK_MEM_ERROR( - cm, cpi->svc.prev_partition_svc, + cm, svc->prev_partition_svc, (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows, - sizeof(*cpi->svc.prev_partition_svc))); + sizeof(*svc->prev_partition_svc))); } } @@ -3893,13 +3904,13 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (cpi->use_svc) { // On non-zero spatial layer, check for disabling inter-layer // prediction. - if (cpi->svc.spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); + if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); vp9_svc_assert_constraints_pattern(cpi); } if (cpi->rc.last_post_encode_dropped_scene_change) { cpi->rc.high_source_sad = 1; - cpi->svc.high_source_sad_superframe = 1; + svc->high_source_sad_superframe = 1; // For now disable use_source_sad since Last_Source will not be the previous // encoded but the dropped one. cpi->sf.use_source_sad = 0; @@ -3910,7 +3921,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // control parameters. if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ && (cpi->rc.high_source_sad || - (cpi->use_svc && cpi->svc.high_source_sad_superframe))) { + (cpi->use_svc && svc->high_source_sad_superframe))) { if (vp9_encodedframe_overshoot(cpi, -1, &q)) { vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); @@ -3945,7 +3956,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, // For SVC: all spatial layers are checked for re-encoding. if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ && (cpi->rc.high_source_sad || - (cpi->use_svc && cpi->svc.high_source_sad_superframe))) { + (cpi->use_svc && svc->high_source_sad_superframe))) { int frame_size = 0; // Get an estimate of the encoded frame size. save_coding_context(cpi); diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index bb316fe11..779529b24 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -2122,7 +2122,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; if (is_one_pass_cbr_svc(cpi)) { - if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi); + if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1); layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); svc->layer_context[layer].is_key_frame = 1; @@ -2750,8 +2750,10 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) { #endif rc->high_source_sad = 0; rc->high_num_blocks_with_motion = 0; - if (cpi->svc.spatial_layer_id == 0 && src_width == last_src_width && - src_height == last_src_height) { + // For SVC: scene detection is only checked on first spatial layer of + // the superframe using the original/unscaled resolutions. + if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && + src_width == last_src_width && src_height == last_src_height) { YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL }; int num_mi_cols = cm->mi_cols; int num_mi_rows = cm->mi_rows; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 7cab7ffb2..033684e2e 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -932,7 +932,7 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) { } // Reset on key frame: reset counters, references and buffer updates. -void vp9_svc_reset_key_frame(VP9_COMP *const cpi) { +void vp9_svc_reset_temporal_layers(VP9_COMP *const cpi, int is_key) { int sl, tl; SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; @@ -940,7 +940,7 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) { for (tl = 0; tl < svc->number_temporal_layers; ++tl) { lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl]; lc->current_video_frame_in_layer = 0; - lc->frames_from_key_frame = 0; + if (is_key) lc->frames_from_key_frame = 0; } } if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { @@ -1108,7 +1108,8 @@ void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) { if (svc->spatial_layer_id == 0) { // On base spatial layer: if the current superframe has a layer sync then // reset the pattern counters and reset to base temporal layer. - if (svc->superframe_has_layer_sync) vp9_svc_reset_key_frame(cpi); + if (svc->superframe_has_layer_sync) + vp9_svc_reset_temporal_layers(cpi, cpi->common.frame_type == KEY_FRAME); } // If the layer sync is set for this current spatial layer then // disable the temporal reference. diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index a114a9188..17d7a8a50 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -237,7 +237,7 @@ int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); -void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi); +void vp9_svc_reset_temporal_layers(struct VP9_COMP *const cpi, int is_key); void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi); -- 2.40.0