From 7e3a82c3847c9f6f84f2a2f7cf3ea2aab0388053 Mon Sep 17 00:00:00 2001 From: Marco Date: Wed, 21 Dec 2016 14:33:21 -0800 Subject: [PATCH] vp9: Make the denoiser work with spatial SVC. If enabled denoiser will only denoise the top spatial layer for now. Added unittest for SVC with denoising. Change-Id: Ifa373771c4ecfa208615eb163cc38f1c22c6664b --- test/datarate_test.cc | 47 ++++++++++++++++++++++++++++++++ vp9/encoder/vp9_denoiser.c | 17 ++++++++---- vp9/encoder/vp9_encoder.c | 2 +- vp9/encoder/vp9_encoder.h | 8 ++++++ vp9/encoder/vp9_noise_estimate.c | 25 ++++++++++------- vp9/encoder/vp9_pickmode.c | 8 ++++-- 6 files changed, 87 insertions(+), 20 deletions(-) diff --git a/test/datarate_test.cc b/test/datarate_test.cc index 98d77285a..e9a8defc4 100644 --- a/test/datarate_test.cc +++ b/test/datarate_test.cc @@ -1073,6 +1073,7 @@ class DatarateOnePassCbrSvc duration_ = 0.0; mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; + denoiser_on_ = 0; } virtual void BeginPassHook(unsigned int /*pass*/) {} virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, @@ -1087,6 +1088,8 @@ class DatarateOnePassCbrSvc for (i = 1; i < VPX_SS_MAX_LAYERS; ++i) { svc_params_.speed_per_layer[i] = speed_setting_; } + + encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); encoder->Control(VP9E_SET_SVC, 1); encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_); encoder->Control(VP8E_SET_CPUUSED, speed_setting_); @@ -1149,6 +1152,7 @@ class DatarateOnePassCbrSvc int speed_setting_; double mismatch_psnr_; int mismatch_nframes_; + int denoiser_on_; }; static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg, const vpx_svc_extra_cfg_t *svc_params, @@ -1222,6 +1226,49 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers) { } } +// Check basic rate targeting for 1 pass CBR SVC with denoising. +// 2 spatial layers and 3 temporal layer. Run CIF clip with 1 thread. +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayersDenoiserOn) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 2; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.g_error_resilient = 1; + cfg_.g_threads = 1; + cfg_.temporal_layering_mode = 3; + svc_params_.scaling_factor_num[0] = 144; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 288; + svc_params_.scaling_factor_den[1] = 288; + cfg_.rc_dropframe_thresh = 10; + cfg_.kf_max_dist = 9999; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 200); + // TODO(marpan): Check that effective_datarate for each layer hits the + // layer target_bitrate. + for (int i = 200; i <= 800; i += 200) { + cfg_.rc_target_bitrate = i; + ResetModel(); + denoiser_on_ = 1; + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15) + << " The datarate for the file is lower than the target by too much!"; + EXPECT_EQ(static_cast(0), GetMismatchFrames()); + } +} + // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3 // temporal layers. Run CIF clip with 1 thread, and few short key frame periods. TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayersSmallKf) { diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 1d9a6702d..336abb406 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -187,7 +187,8 @@ static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row, static VP9_DENOISER_DECISION perform_motion_compensation( VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs, int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, - int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv) { + int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, + int num_spatial_layers) { int sse_diff = ctx->zeromv_sse - ctx->newmv_sse; MV_REFERENCE_FRAME frame; MACROBLOCKD *filter_mbd = &mb->e_mbd; @@ -211,7 +212,9 @@ static VP9_DENOISER_DECISION perform_motion_compensation( // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. - if (frame != INTRA_FRAME && ctx->newmv_sse != UINT_MAX && + if (frame != INTRA_FRAME && + (frame != GOLDEN_FRAME || num_spatial_layers == 1) && + ctx->newmv_sse != UINT_MAX && sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) { mi->ref_frame[0] = ctx->best_reference_frame; mi->mode = ctx->best_sse_inter_mode; @@ -221,9 +224,10 @@ static VP9_DENOISER_DECISION perform_motion_compensation( frame = ctx->best_zeromv_reference_frame; ctx->newmv_sse = ctx->zeromv_sse; // Bias to last reference. - if (frame != LAST_FRAME && - ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) || - denoiser->denoising_level >= kDenHigh)) { + if (num_spatial_layers > 1 || + (frame != LAST_FRAME && + ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) || + denoiser->denoising_level >= kDenHigh))) { frame = LAST_FRAME; ctx->newmv_sse = ctx->zeromv_lastref_sse; } @@ -361,7 +365,8 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, if (denoiser->denoising_level >= kDenLow) decision = perform_motion_compensation( denoiser, mb, bs, denoiser->increase_denoising, mi_row, mi_col, ctx, - motion_magnitude, is_skin, &zeromv_filter, consec_zeromv); + motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, + cpi->svc.number_spatial_layers); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter( diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 37cf8257a..3fa7dbbf6 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -2568,7 +2568,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { sizeof(cpi->interp_filter_selected[0])); } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow) { vp9_denoiser_update_frame_info( &cpi->denoiser, *cpi->Source, cpi->common.frame_type, diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index de324d3aa..c415414f3 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -765,6 +765,14 @@ static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { return (cpi->use_svc && cpi->oxcf.pass == 0); } +#if CONFIG_VP9_TEMPORAL_DENOISING +static INLINE int denoise_svc(const struct VP9_COMP *const cpi) { + return (!cpi->use_svc || + (cpi->use_svc && + cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)); +} +#endif + static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { return !(cpi->oxcf.mode == REALTIME && cpi->oxcf.rc_mode == VPX_CBR) && cpi->oxcf.lag_in_frames > 0 && diff --git a/vp9/encoder/vp9_noise_estimate.c b/vp9/encoder/vp9_noise_estimate.c index 2252fe16b..3dc321bfc 100644 --- a/vp9/encoder/vp9_noise_estimate.c +++ b/vp9/encoder/vp9_noise_estimate.c @@ -40,8 +40,8 @@ void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { static int enable_noise_estimation(VP9_COMP *const cpi) { // Enable noise estimation if denoising is on, but not for low resolutions. #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && cpi->common.width >= 640 && - cpi->common.height >= 360) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && + cpi->common.width >= 640 && cpi->common.height >= 360) return 1; #endif // Only allow noise estimate under certain encoding mode. @@ -101,17 +101,22 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { unsigned int thresh_sum_spatial = (200 * 200) << 8; unsigned int thresh_spatial_var = (32 * 32) << 8; int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7; + int frame_counter = cm->current_video_frame; // Estimate is between current source and last source. YV12_BUFFER_CONFIG *last_source = cpi->Last_Source; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) last_source = &cpi->denoiser.last_source; + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) + last_source = &cpi->denoiser.last_source; #endif ne->enabled = enable_noise_estimation(cpi); - if (!ne->enabled || cm->current_video_frame % frame_period != 0 || - last_source == NULL || ne->last_w != cm->width || - ne->last_h != cm->height) { + if (cpi->svc.number_spatial_layers > 1) + frame_counter = cpi->svc.current_superframe; + if (!ne->enabled || frame_counter % frame_period != 0 || + last_source == NULL || + (cpi->svc.number_spatial_layers == 1 && + (ne->last_w != cm->width || ne->last_h != cm->height))) { #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) copy_frame(&cpi->denoiser.last_source, cpi->Source); #endif if (last_source != NULL) { @@ -123,7 +128,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { // Force noise estimation to 0 and denoiser off if content has high motion. ne->level = kLowLow; #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); #endif return; @@ -232,14 +237,14 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) { ne->count = 0; ne->level = vp9_noise_estimate_extract_level(ne); #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level); #endif } } } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0) + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi)) copy_frame(&cpi->denoiser.last_source, cpi->Source); #endif } diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index 33f3f5a47..041d2a59d 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -1239,6 +1239,7 @@ static void recheck_zeromv_after_denoising( ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) && ((ctx_den->best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) || (ctx_den->best_ref_frame == GOLDEN_FRAME && + cpi->svc.number_spatial_layers == 1 && decision == FILTER_ZEROMV_BLOCK))) { // Check if we should pick ZEROMV on denoised signal. int rate = 0; @@ -1459,7 +1460,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow) { vp9_denoiser_reset_frame_stats(ctx); } @@ -1885,7 +1886,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, } #if CONFIG_VP9_TEMPORAL_DENOISING - if (cpi->oxcf.noise_sensitivity > 0 && + if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow) { vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx); // Keep track of zero_last cost. @@ -2078,7 +2079,8 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0 && - cpi->denoiser.denoising_level > kDenLowLow && cpi->denoiser.reset == 0) { + denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow && + cpi->denoiser.reset == 0) { VP9_DENOISER_DECISION decision = COPY_BLOCK; vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost, frame_mv, reuse_inter_pred, best_tx_size, -- 2.40.0