From f058688eaa8b9fb2ff1f4d92eb02cf888fd28ec0 Mon Sep 17 00:00:00 2001 From: Jerome Jiang Date: Fri, 1 Jun 2018 14:27:34 -0700 Subject: [PATCH] vp9-svc: Allow usage of second (long term) temporal reference. Allow for second temporal reference for top spatial layer in SVC, when inter-layer prediction is disabled on INTER frames. The second temporal reference is labelled as the golden reference and the update/refresh of this reference buffer is only on base temporal layer superframes. For now the period of refresh is fixed at every 20 TL0 superframes. Average gain is ~4% on RTC set, several clips up by ~8-12%. Speed loss is about ~2% on mac. Feature is disabled as default for now. Change-Id: I2e5db5052c62dbe958a3b14be97d043823b7a529 --- vp9/encoder/vp9_pickmode.c | 19 +++++++++----- vp9/encoder/vp9_ratectrl.c | 40 ++++++++++++++++++++++++++--- vp9/encoder/vp9_svc_layercontext.c | 41 +++++++++++++++++++++++++++++- vp9/encoder/vp9_svc_layercontext.h | 5 ++++ 4 files changed, 95 insertions(+), 10 deletions(-) diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 45c96fdc5..66810ae97 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1497,6 +1497,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int skip_ref_find_pred[4] = { 0 }; unsigned int sse_zeromv_normalized = UINT_MAX; unsigned int best_sse_sofar = UINT_MAX; + int gf_is_longterm_ref = 0; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_PICKMODE_CTX_DEN ctx_den; int64_t zero_last_cost_orig = INT64_MAX; @@ -1538,6 +1539,11 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, cm->base_qindex < svc->lower_layer_qindex - 20) thresh_svc_skip_golden = 1000; + if (!cpi->use_svc || + (svc->use_longterm_ref_current_layer && + !svc->layer_context[svc->temporal_layer_id].is_key_frame)) + gf_is_longterm_ref = 1; + init_ref_frame_cost(cm, xd, ref_frame_cost); memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES); @@ -1610,7 +1616,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } #endif - if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc && + if (cpi->rc.frames_since_golden == 0 && gf_is_longterm_ref && !cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) { usable_ref_frame = LAST_FRAME; } else { @@ -1637,7 +1643,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // For svc mode, on spatial_layer_id > 0: if the reference has different scale // constrain the inter mode to only test zero motion. if (cpi->use_svc && svc->force_zero_mode_spatial_ref && - svc->spatial_layer_id > 0) { + svc->spatial_layer_id > 0 && !gf_is_longterm_ref) { if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; if (vp9_is_scaled(sf)) { @@ -1716,7 +1722,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // The nonzero motion is half pixel shifted to left and top (-4, -4). if (cpi->use_svc && svc->spatial_layer_id > 0 && svc_force_zero_mode[inter_layer_ref - 1] && - svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8) { + svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 && + !gf_is_longterm_ref) { svc_mv_col = -4; svc_mv_row = -4; flag_svc_subpel = 1; @@ -1789,7 +1796,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // For SVC, skip the golden (spatial) reference search if sse of zeromv_last // is below threshold. - if (cpi->use_svc && ref_frame == GOLDEN_FRAME && + if (cpi->use_svc && ref_frame == GOLDEN_FRAME && !gf_is_longterm_ref && sse_zeromv_normalized < thresh_svc_skip_golden) continue; @@ -1909,7 +1916,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, if (frame_mv[this_mode][ref_frame].as_int != 0) continue; if (this_mode == NEWMV && !force_mv_inter_layer) { - if (ref_frame > LAST_FRAME && !cpi->use_svc && + if (ref_frame > LAST_FRAME && gf_is_longterm_ref && cpi->oxcf.rc_mode == VPX_CBR) { int tmp_sad; uint32_t dis; @@ -2277,7 +2284,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, // layer is chosen as the reference. Always perform intra prediction if // LAST is the only reference, or is_key_frame is set, or on base // temporal layer. - if (svc->spatial_layer_id) { + if (svc->spatial_layer_id && !gf_is_longterm_ref) { perform_intra_pred = svc->temporal_layer_id == 0 || svc->layer_context[svc->temporal_layer_id].is_key_frame || diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 9e361e768..198f3fd56 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -1597,6 +1597,18 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { update_golden_frame_stats(cpi); } + // If second (long term) temporal reference is used for SVC, + // update the golden frame counter, only for base temporal layer. + if (cpi->use_svc && cpi->svc.use_longterm_ref_current_layer && + cpi->svc.temporal_layer_id == 0) { + if (cpi->refresh_golden_frame) + rc->frames_since_golden = 0; + else + rc->frames_since_golden++; + // Decrement count down till next gf + if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--; + } + if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0; if (cm->show_frame) { rc->frames_since_key++; @@ -1861,15 +1873,37 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { target = calc_pframe_target_size_one_pass_cbr(cpi); } } - + // If long term termporal feature is enabled, set the period of the update. + // The update/refresh of this reference frame is always on base temporal + // layer frame. + if (cpi->svc.use_longterm_ref_current_layer && + cpi->svc.temporal_layer_id == 0) { + if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) { + // On key frame we update the buffer index used for long term reference. + // Use the alt_ref since it is not used or updated on key frames. + cpi->ext_refresh_alt_ref_frame = 1; + cpi->alt_fb_idx = cpi->svc.buffer_idx_longterm_ref; + } else if (rc->frames_till_gf_update_due == 0) { + // Set perdiod of next update. Make it a multiple of 10, as the cyclic + // refresh is typically ~10%, and we'd like the update to happen after + // a few cylces of the refresh (so it better quality frame). Note the + // cyclic refresh for SVC only operates on base temporal layer frames. + // Choose 20 as perdiod for now (2 cycles). + rc->baseline_gf_interval = 20; + rc->frames_till_gf_update_due = rc->baseline_gf_interval; + cpi->ext_refresh_golden_frame = 1; + rc->gfu_boost = DEFAULT_GF_BOOST; + } + } else if (!cpi->svc.use_longterm_ref) { + rc->frames_till_gf_update_due = INT_MAX; + rc->baseline_gf_interval = INT_MAX; + } // Any update/change of global cyclic refresh parameters (amount/delta-qp) // should be done here, before the frame qp is selected. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_parameters(cpi); vp9_rc_set_frame_target(cpi, target); - rc->frames_till_gf_update_due = INT_MAX; - rc->baseline_gf_interval = INT_MAX; } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index d745ae0df..1582b7b99 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -33,6 +33,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->force_zero_mode_spatial_ref = 0; svc->use_base_mv = 0; svc->use_partition_reuse = 0; + svc->use_longterm_ref = 0; + svc->use_longterm_ref_current_layer = 0; svc->scaled_temp_is_alloc = 0; svc->scaled_one_half = 0; svc->current_superframe = 0; @@ -709,6 +711,34 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { } } + // For the fixed (non-flexible/bypass) SVC mode: + // If long term temporal reference is enabled at the sequence level + // (use_longterm_ref == 1), and inter_layer is disabled (on inter-frames), + // we can use golden as a second temporal reference + // (since the spatial/inter-layer reference is disabled). + // To be safe we use fb_index 7 for this, since for 3-3 layer system slot 7 + // should be free/un-used. For now usage of this second temporal reference + // will only be used for highest spatial layer. + cpi->svc.use_longterm_ref_current_layer = 0; + cpi->svc.buffer_idx_longterm_ref = 7; + if (cpi->svc.use_longterm_ref && + cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + cpi->svc.disable_inter_layer_pred != INTER_LAYER_PRED_ON && + cpi->svc.number_spatial_layers <= 3 && + cpi->svc.number_temporal_layers <= 3 && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { + // Enable the second (long-term) temporal reference at the frame-level. + cpi->svc.use_longterm_ref_current_layer = 1; + // Only used for prediction for on non-key superframes. + if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) { + // Use golden for this reference which will be used for prediction. + cpi->gld_fb_idx = cpi->svc.buffer_idx_longterm_ref; + // Enable prediction off LAST (last reference) and golden (which will + // generally be further behind/long-term reference). + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } + // Reset the drop flags for all spatial layers, on the base layer. if (cpi->svc.spatial_layer_id == 0) { vp9_zero(cpi->svc.drop_spatial_layer); @@ -955,7 +985,7 @@ void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) { if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON && svc->framedrop_mode != LAYER_DROP) { - if (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) { + if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { // On non-key frames: LAST is always temporal reference, GOLDEN is // spatial reference. if (svc->temporal_layer_id == 0) @@ -988,5 +1018,14 @@ void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) { svc->temporal_layer_id); } } + } else if (svc->use_longterm_ref_current_layer && + !svc->layer_context[svc->temporal_layer_id].is_key_frame) { + // If the usage of golden as second long term reference is enabled for this + // layer, then temporal_layer_id of that reference must be base temporal + // layer 0, and spatial_layer_id of that reference must be same as current + // spatial_layer_id. + assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == + svc->spatial_layer_id); + assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == 0); } } diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 9be5bb7ea..f1a1df936 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -97,6 +97,11 @@ typedef struct SVC { int gld_fb_idx[VPX_MAX_LAYERS]; int alt_fb_idx[VPX_MAX_LAYERS]; int force_zero_mode_spatial_ref; + // Sequence level flag to enable second (long term) temporal reference. + int use_longterm_ref; + // Frame level flag to enable second (long term) temporal reference. + int use_longterm_ref_current_layer; + int buffer_idx_longterm_ref; int current_superframe; int non_reference_frame; int use_base_mv; -- 2.40.0