From: Jerome Jiang Date: Fri, 8 Dec 2017 18:38:18 +0000 (-0800) Subject: vp9 svc: Allow denoising next to highest resolution. X-Git-Tag: v1.7.0~26^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c1e511fd82fbb89ad37ab95116a04cddc90b4d49;p=libvpx vp9 svc: Allow denoising next to highest resolution. Denoise 2 spatial layes at most. Add noise sensitivity level 2 for vp9 such that applications can control whether to denoise the second highest spatial layer. Add tests to cover this case. Change-Id: Ic327d14b29adeba3f0dae547629f43b98d22997f --- diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c index e7b31f5eb..f5736ea45 100644 --- a/examples/vpx_temporal_svc_encoder.c +++ b/examples/vpx_temporal_svc_encoder.c @@ -32,13 +32,21 @@ static const char *exec_name; void usage_exit(void) { exit(EXIT_FAILURE); } -// Denoiser states, for temporal denoising. -enum denoiserState { - kDenoiserOff, - kDenoiserOnYOnly, - kDenoiserOnYUV, - kDenoiserOnYUVAggressive, - kDenoiserOnAdaptive +// Denoiser states for vp8, for temporal denoising. +enum denoiserStateVp8 { + kVp8DenoiserOff, + kVp8DenoiserOnYOnly, + kVp8DenoiserOnYUV, + kVp8DenoiserOnYUVAggressive, + kVp8DenoiserOnAdaptive +}; + +// Denoiser states for vp9, for temporal denoising. +enum denoiserStateVp9 { + kVp9DenoiserOff, + kVp9DenoiserOnYOnly, + // For SVC: denoise the top two spatial layers. + kVp9DenoiserOnYTwoSpatialLayers }; static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 }; @@ -755,7 +763,7 @@ int main(int argc, char **argv) { if (strncmp(encoder->name, "vp8", 3) == 0) { vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); - vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0); #if VP8_ROI_MAP @@ -772,7 +780,7 @@ int main(int argc, char **argv) { vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0); vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0); vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); - vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff); + vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1)); diff --git a/test/datarate_test.cc b/test/datarate_test.cc index 7ae761fd4..d8963f2f4 100644 --- a/test/datarate_test.cc +++ b/test/datarate_test.cc @@ -1449,24 +1449,29 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SL3TLDenoiserOn) { ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300); // TODO(marpan): Check that effective_datarate for each layer hits the // layer target_bitrate. - for (int i = 600; i <= 1000; i += 200) { - cfg_.rc_target_bitrate = i; - ResetModel(); - denoiser_on_ = 1; - assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, - cfg_.ts_number_layers, cfg_.temporal_layering_mode); - ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78) - << " The datarate for the file exceeds the target by too much!"; - ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15) - << " The datarate for the file is lower than the target by too much!"; + // For SVC, noise_sen = 1 means denoising only the top spatial layer + // noise_sen = 2 means denoising the two top spatial layers. + for (int noise_sen = 1; noise_sen <= 2; noise_sen++) { + for (int i = 600; i <= 1000; i += 200) { + cfg_.rc_target_bitrate = i; + ResetModel(); + denoiser_on_ = noise_sen; + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.78) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15) + << " The datarate for the file is lower than the target by too much!"; #if CONFIG_VP9_DECODER - // Number of temporal layers > 1, so half of the frames in this SVC pattern - // will be non-reference frame and hence encoder will avoid loopfilter. - // Since frame dropper is off, we can expcet 150 (half of the sequence) - // mismatched frames. - EXPECT_EQ(static_cast(150), GetMismatchFrames()); + // Number of temporal layers > 1, so half of the frames in this SVC + // pattern + // will be non-reference frame and hence encoder will avoid loopfilter. + // Since frame dropper is off, we can expcet 150 (half of the sequence) + // mismatched frames. + EXPECT_EQ(static_cast(150), GetMismatchFrames()); #endif + } } } diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 70ddc7c99..b08ccaa66 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -189,11 +189,12 @@ static VP9_DENOISER_DECISION perform_motion_compensation( int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx, - int use_svc) { + int use_svc, int spatial_layer) { const int sse_diff = (ctx->newmv_sse == UINT_MAX) ? 0 : ((int)ctx->zeromv_sse - (int)ctx->newmv_sse); int frame; + int denoise_layer_idx = 0; MACROBLOCKD *filter_mbd = &mb->e_mbd; MODE_INFO *mi = filter_mbd->mi[0]; MODE_INFO saved_mi; @@ -254,6 +255,10 @@ static VP9_DENOISER_DECISION perform_motion_compensation( frame = lst_fb_idx + 1; else if (frame == GOLDEN_FRAME) frame = gld_fb_idx + 1; + // Shift for the second spatial layer. + if (num_spatial_layers - spatial_layer == 2) + frame = frame + denoiser->num_ref_frames; + denoise_layer_idx = num_spatial_layers - spatial_layer - 1; } if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { @@ -289,18 +294,21 @@ static VP9_DENOISER_DECISION perform_motion_compensation( denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col); filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride; - filter_mbd->plane[0].dst.buf = - block_start(denoiser->mc_running_avg_y.y_buffer, - denoiser->mc_running_avg_y.y_stride, mi_row, mi_col); - filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride; - filter_mbd->plane[1].dst.buf = - block_start(denoiser->mc_running_avg_y.u_buffer, - denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col); - filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride; - filter_mbd->plane[2].dst.buf = - block_start(denoiser->mc_running_avg_y.v_buffer, - denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col); - filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride; + filter_mbd->plane[0].dst.buf = block_start( + denoiser->mc_running_avg_y[denoise_layer_idx].y_buffer, + denoiser->mc_running_avg_y[denoise_layer_idx].y_stride, mi_row, mi_col); + filter_mbd->plane[0].dst.stride = + denoiser->mc_running_avg_y[denoise_layer_idx].y_stride; + filter_mbd->plane[1].dst.buf = block_start( + denoiser->mc_running_avg_y[denoise_layer_idx].u_buffer, + denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col); + filter_mbd->plane[1].dst.stride = + denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride; + filter_mbd->plane[2].dst.buf = block_start( + denoiser->mc_running_avg_y[denoise_layer_idx].v_buffer, + denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col); + filter_mbd->plane[2].dst.stride = + denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride; set_ref_ptrs(cm, filter_mbd, saved_frame, NONE); vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs); @@ -324,9 +332,17 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, int zeromv_filter = 0; VP9_DENOISER *denoiser = &cpi->denoiser; VP9_DENOISER_DECISION decision = COPY_BLOCK; - YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME]; - YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y; + + const int shift = + cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 + ? denoiser->num_ref_frames + : 0; + YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME + shift]; + const int denoise_layer_index = + cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id - 1; + YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y[denoise_layer_index]; uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col); + uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col); struct buf_2d src = mb->plane[0].src; @@ -381,7 +397,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx, - cpi->gld_fb_idx, cpi->use_svc); + cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start, @@ -432,7 +448,8 @@ void vp9_denoiser_update_frame_info( VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, - int svc_base_is_key) { + int svc_base_is_key, int second_spatial_layer) { + const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0; // Copy source into denoised reference buffers on KEY_FRAME or // if the just encoded frame was resized. For SVC, copy source if the base // spatial layer was key frame. @@ -441,8 +458,8 @@ void vp9_denoiser_update_frame_info( int i; // Start at 1 so as not to overwrite the INTRA_FRAME for (i = 1; i < denoiser->num_ref_frames; ++i) { - if (denoiser->running_avg_y[i].buffer_alloc != NULL) - copy_frame(&denoiser->running_avg_y[i], &src); + if (denoiser->running_avg_y[i + shift].buffer_alloc != NULL) + copy_frame(&denoiser->running_avg_y[i + shift], &src); } denoiser->reset = 0; return; @@ -451,29 +468,29 @@ void vp9_denoiser_update_frame_info( // If more than one refresh occurs, must copy frame buffer. if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) { if (refresh_alt_ref_frame) { - copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1], - &denoiser->running_avg_y[INTRA_FRAME]); + copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_golden_frame) { - copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1], - &denoiser->running_avg_y[INTRA_FRAME]); + copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_last_frame) { - copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1], - &denoiser->running_avg_y[INTRA_FRAME]); + copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); } } else { if (refresh_alt_ref_frame) { - swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1], - &denoiser->running_avg_y[INTRA_FRAME]); + swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_golden_frame) { - swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1], - &denoiser->running_avg_y[INTRA_FRAME]); + swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_last_frame) { - swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1], - &denoiser->running_avg_y[INTRA_FRAME]); + swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], + &denoiser->running_avg_y[INTRA_FRAME + shift]); } } } @@ -522,44 +539,90 @@ static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm, } int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, - int refresh_alt, int refresh_gld, int refresh_lst, - int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) { + int svc_buf_shift, int refresh_alt, + int refresh_gld, int refresh_lst, int alt_fb_idx, + int gld_fb_idx, int lst_fb_idx) { int fail = 0; if (refresh_alt) { // Increase the frame buffer index by 1 to map it to the buffer index in the // denoiser. - fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, alt_fb_idx + 1); + fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, + alt_fb_idx + 1 + svc_buf_shift); if (fail) return 1; } if (refresh_gld) { - fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, gld_fb_idx + 1); + fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, + gld_fb_idx + 1 + svc_buf_shift); if (fail) return 1; } if (refresh_lst) { - fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, lst_fb_idx + 1); + fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, + lst_fb_idx + 1 + svc_buf_shift); if (fail) return 1; } return 0; } -int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser, - int width, int height, int ssx, int ssy, +int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, + int use_svc, int noise_sen, int width, int height, + int ssx, int ssy, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif int border) { - int i, fail, init_num_ref_frames; + int i, layer, fail, init_num_ref_frames; const int legacy_byte_alignment = 0; + int num_layers = 1; + int scaled_width = width; + int scaled_height = height; + if (use_svc) { + LAYER_CONTEXT *lc = &svc->layer_context[svc->spatial_layer_id * + svc->number_temporal_layers + + svc->temporal_layer_id]; + get_layer_resolution(width, height, lc->scaling_factor_num, + lc->scaling_factor_den, &scaled_width, &scaled_height); + // For SVC: only denoise at most 2 spatial (highest) layers. + if (noise_sen >= 2) + // Denoise from one spatial layer below the top. + svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 2, 0); + else + // Only denoise the top spatial layer. + svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 1, 0); + num_layers = svc->number_spatial_layers - svc->first_layer_denoise; + } assert(denoiser != NULL); - denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES; init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES; + denoiser->num_layers = num_layers; + CHECK_MEM_ERROR(cm, denoiser->running_avg_y, + vpx_calloc(denoiser->num_ref_frames * num_layers, + sizeof(denoiser->running_avg_y[0]))); CHECK_MEM_ERROR( - cm, denoiser->running_avg_y, - vpx_calloc(denoiser->num_ref_frames, sizeof(denoiser->running_avg_y[0]))); - for (i = 0; i < init_num_ref_frames; ++i) { - fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height, - ssx, ssy, + cm, denoiser->mc_running_avg_y, + vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0]))); + + for (layer = 0; layer < num_layers; ++layer) { + const int denoise_width = (layer == 0) ? width : scaled_width; + const int denoise_height = (layer == 0) ? height : scaled_height; + for (i = 0; i < init_num_ref_frames; ++i) { + fail = vpx_alloc_frame_buffer( + &denoiser->running_avg_y[i + denoiser->num_ref_frames * layer], + denoise_width, denoise_height, ssx, ssy, +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth, +#endif + border, legacy_byte_alignment); + if (fail) { + vp9_denoiser_free(denoiser); + return 1; + } +#ifdef OUTPUT_YUV_DENOISED + make_grayscale(&denoiser->running_avg_y[i]); +#endif + } + + fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y[layer], + denoise_width, denoise_height, ssx, ssy, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif @@ -568,22 +631,10 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser, vp9_denoiser_free(denoiser); return 1; } -#ifdef OUTPUT_YUV_DENOISED - make_grayscale(&denoiser->running_avg_y[i]); -#endif - } - - fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx, - ssy, -#if CONFIG_VP9_HIGHBITDEPTH - use_highbitdepth, -#endif - border, legacy_byte_alignment); - if (fail) { - vp9_denoiser_free(denoiser); - return 1; } + // denoiser->last_source only used for noise_estimation, so only for top + // layer. fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, @@ -609,12 +660,18 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) { return; } denoiser->frame_buffer_initialized = 0; - for (i = 0; i < denoiser->num_ref_frames; ++i) { + for (i = 0; i < denoiser->num_ref_frames * denoiser->num_layers; ++i) { vpx_free_frame_buffer(&denoiser->running_avg_y[i]); } vpx_free(denoiser->running_avg_y); denoiser->running_avg_y = NULL; - vpx_free_frame_buffer(&denoiser->mc_running_avg_y); + + for (i = 0; i < denoiser->num_layers; ++i) { + vpx_free_frame_buffer(&denoiser->mc_running_avg_y[i]); + } + + vpx_free(denoiser->mc_running_avg_y); + denoiser->mc_running_avg_y = NULL; vpx_free_frame_buffer(&denoiser->last_source); } diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h index ee0752729..f4da24cbf 100644 --- a/vp9/encoder/vp9_denoiser.h +++ b/vp9/encoder/vp9_denoiser.h @@ -44,11 +44,12 @@ typedef enum vp9_denoiser_level { typedef struct vp9_denoiser { YV12_BUFFER_CONFIG *running_avg_y; - YV12_BUFFER_CONFIG mc_running_avg_y; + YV12_BUFFER_CONFIG *mc_running_avg_y; YV12_BUFFER_CONFIG last_source; int frame_buffer_initialized; int reset; int num_ref_frames; + int num_layers; VP9_DENOISER_LEVEL denoising_level; VP9_DENOISER_LEVEL prev_denoising_level; } VP9_DENOISER; @@ -66,12 +67,13 @@ typedef struct { } VP9_PICKMODE_CTX_DEN; struct VP9_COMP; +struct SVC; void vp9_denoiser_update_frame_info( VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, - int svc_base_is_key); + int svc_base_is_key, int second_spatial_layer); void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, @@ -84,11 +86,13 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse, PICK_MODE_CONTEXT *ctx); int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, - int refresh_alt, int refresh_gld, int refresh_lst, - int alt_fb_idx, int gld_fb_idx, int lst_fb_idx); + int svc_buf_shift, int refresh_alt, + int refresh_gld, int refresh_lst, int alt_fb_idx, + int gld_fb_idx, int lst_fb_idx); -int vp9_denoiser_alloc(VP9_COMMON *cm, int use_svc, VP9_DENOISER *denoiser, - int width, int height, int ssx, int ssy, +int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, + int use_svc, int noise_sen, int width, int height, + int ssx, int ssy, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 087b492ba..e840155b1 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2861,18 +2861,26 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow) { int svc_base_is_key = 0; + int denoise_svc_second_layer = 0; if (cpi->use_svc) { int realloc_fail = 0; + const int svc_buf_shift = + cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 + ? cpi->denoiser.num_ref_frames + : 0; int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; svc_base_is_key = lc->is_key_frame; - - // Check if we need to allocate extra buffers in the denoiser for + denoise_svc_second_layer = + cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1 + : 0; + // Check if we need to allocate extra buffers in the denoiser + // for // refreshed frames. realloc_fail = vp9_denoiser_realloc_svc( - cm, &cpi->denoiser, cpi->refresh_alt_ref_frame, + cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, cpi->lst_fb_idx); if (realloc_fail) @@ -2883,7 +2891,8 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { &cpi->denoiser, *cpi->Source, cpi->common.frame_type, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, - cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key); + cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key, + denoise_svc_second_layer); } #endif if (is_one_pass_cbr_svc(cpi)) { @@ -3318,8 +3327,9 @@ static void setup_denoiser_buffer(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (cpi->oxcf.noise_sensitivity > 0 && !cpi->denoiser.frame_buffer_initialized) { - if (vp9_denoiser_alloc(cm, cpi->use_svc, &cpi->denoiser, cm->width, - cm->height, cm->subsampling_x, cm->subsampling_y, + if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc, + cpi->oxcf.noise_sensitivity, cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 1b3f49833..d723d93cb 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -870,7 +870,7 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { static INLINE int denoise_svc(const struct VP9_COMP *const cpi) { return (!cpi->use_svc || (cpi->use_svc && - cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)); + cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise)); } #endif diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c index 1341e79c4..276a0c785 100644 --- a/vp9/encoder/vp9_noise_estimate.c +++ b/vp9/encoder/vp9_noise_estimate.c @@ -21,6 +21,15 @@ #include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_encoder.h" +#if CONFIG_VP9_TEMPORAL_DENOISING +// For SVC: only do noise estimation on top spatial layer. +static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) { + return (!cpi->use_svc || + (cpi->use_svc && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)); +} +#endif + void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { ne->enabled = 0; ne->level = kLowLow; @@ -45,7 +54,7 @@ static int enable_noise_estimation(VP9_COMP *const cpi) { #endif // Enable noise estimation if denoising is on. #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) && cpi->common.width >= 320 && cpi->common.height >= 180) return 1; #endif @@ -111,7 +120,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { // Estimate is between current source and last source. YV12_BUFFER_CONFIG *last_source = cpi->Last_Source; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) { + if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) { last_source = &cpi->denoiser.last_source; // Tune these thresholds for different resolutions when denoising is // enabled. @@ -131,7 +140,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { (cpi->svc.number_spatial_layers == 1 && (ne->last_w != cm->width || ne->last_h != cm->height))) { #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) + if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) copy_frame(&cpi->denoiser.last_source, cpi->Source); #endif if (last_source != NULL) { @@ -146,7 +155,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { ne->count = 0; ne->num_frames_estimate = 10; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) && cpi->svc.current_superframe > 1) { vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); copy_frame(&cpi->denoiser.last_source, cpi->Source); @@ -257,14 +266,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { ne->count = 0; ne->level = vp9_noise_estimate_extract_level(ne); #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) + if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); #endif } } } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) + if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) copy_frame(&cpi->denoiser.last_source, cpi->Source); #endif } diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index d215cf64a..e0d3bad8b 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -37,6 +37,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->scaled_one_half = 0; svc->current_superframe = 0; svc->non_reference_frame = 0; + for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { svc->ext_frame_flags[sl] = 0; @@ -389,9 +390,9 @@ int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { .is_key_frame; } -static void get_layer_resolution(const int width_org, const int height_org, - const int num, const int den, int *width_out, - int *height_out) { +void get_layer_resolution(const int width_org, const int height_org, + const int num, const int den, int *width_out, + int *height_out) { int w, h; if (width_out == NULL || height_out == NULL || den == 0) return; diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 2cd75a3d5..16d1d6bb1 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -49,7 +49,7 @@ typedef struct { uint8_t speed; } LAYER_CONTEXT; -typedef struct { +typedef struct SVC { int spatial_layer_id; int temporal_layer_id; int number_spatial_layers; @@ -99,6 +99,8 @@ typedef struct { BLOCK_SIZE *prev_partition_svc; int mi_stride[VPX_MAX_LAYERS]; + + int first_layer_denoise; } SVC; struct VP9_COMP; @@ -128,6 +130,10 @@ void vp9_save_layer_context(struct VP9_COMP *const cpi); // Initialize second pass rc for spatial svc. void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); +void get_layer_resolution(const int width_org, const int height_org, + const int num, const int den, int *width_out, + int *height_out); + // Increment number of video frames in layer void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi); diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 68969cc50..c21b8b60d 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -408,7 +408,7 @@ enum vp8e_enc_control_id { /*!\brief Codec control function to set noise sensitivity. * - * 0: off, 1: On(YOnly) + * 0: off, 1: On(YOnly), 2: For SVC only, on top two spatial layers(YOnly) * * Supported in codecs: VP9 */