From d162934bdca89b156194732226746993356344ef Mon Sep 17 00:00:00 2001 From: Marco Date: Fri, 16 Oct 2015 16:14:22 -0700 Subject: [PATCH] VP9: Estimate noise level for denoiser. Periodically estiamte noise level in source, and only denoise if estimated noise level is above threshold. Change-Id: I54f967b3003b0c14d0b1d3dc83cb82ce8cc2d381 --- vp9/encoder/vp9_denoiser.c | 159 +++++++++++++++++++++++++++++++++++-- vp9/encoder/vp9_denoiser.h | 13 +++ vp9/encoder/vp9_encoder.c | 7 ++ 3 files changed, 173 insertions(+), 6 deletions(-) diff --git a/vp9/encoder/vp9_denoiser.c b/vp9/encoder/vp9_denoiser.c index 678e3123f..74189667b 100644 --- a/vp9/encoder/vp9_denoiser.c +++ b/vp9/encoder/vp9_denoiser.c @@ -10,6 +10,8 @@ #include #include +#include + #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_scale/yv12config.h" @@ -17,6 +19,7 @@ #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_denoiser.h" +#include "vp9/encoder/vp9_encoder.h" /* The VP9 denoiser is a work-in-progress. It currently is only designed to work * with speed 6, though it (inexplicably) seems to also work with speed 5 (one @@ -325,7 +328,7 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, struct buf_2d src = mb->plane[0].src; int is_skin = 0; - if (bs <= BLOCK_16X16) { + if (bs <= BLOCK_16X16 && !denoiser->no_denoising) { // Take center pixel in block to determine is_skin. const int y_width_shift = (4 << b_width_log2_lookup[bs]) >> 1; const int y_height_shift = (4 << b_height_log2_lookup[bs]) >> 1; @@ -342,11 +345,12 @@ void vp9_denoiser_denoise(VP9_DENOISER *denoiser, MACROBLOCK *mb, is_skin = vp9_skin_pixel(ysource, usource, vsource); } - decision = perform_motion_compensation(denoiser, mb, bs, - denoiser->increase_denoising, - mi_row, mi_col, ctx, - &motion_magnitude, - is_skin); + if (!denoiser->no_denoising) + decision = perform_motion_compensation(denoiser, mb, bs, + denoiser->increase_denoising, + mi_row, mi_col, ctx, + &motion_magnitude, + is_skin); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter(src.buf, src.stride, @@ -493,6 +497,17 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, ssx, ssy, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, +#endif + border, legacy_byte_alignment); + if (fail) { + vp9_denoiser_free(denoiser); + return 1; + } + + fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, + ssx, ssy, +#if CONFIG_VP9_HIGHBITDEPTH + use_highbitdepth, #endif border, legacy_byte_alignment); if (fail) { @@ -504,9 +519,27 @@ int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, #endif denoiser->increase_denoising = 0; denoiser->frame_buffer_initialized = 1; + vp9_denoiser_init_noise_estimate(denoiser, width, height); return 0; } +void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser, + int width, + int height) { + // Denoiser is off by default, i.e., no denoising is performed. + // Noise level is measured periodically, and if observed to be above + // thresh_noise_estimate, then denoising is performed, i.e., no_denoising = 0. + denoiser->no_denoising = 1; + denoiser->noise_estimate = 0; + denoiser->noise_estimate_count = 0; + denoiser->thresh_noise_estimate = 20; + if (width * height >= 1920 * 1080) { + denoiser->thresh_noise_estimate = 70; + } else if (width * height >= 1280 * 720) { + denoiser->thresh_noise_estimate = 40; + } +} + void vp9_denoiser_free(VP9_DENOISER *denoiser) { int i; denoiser->frame_buffer_initialized = 0; @@ -517,6 +550,120 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) { vpx_free_frame_buffer(&denoiser->running_avg_y[i]); } vpx_free_frame_buffer(&denoiser->mc_running_avg_y); + vpx_free_frame_buffer(&denoiser->last_source); +} + +void vp9_denoiser_update_noise_estimate(VP9_COMP *const cpi) { + const VP9_COMMON *const cm = &cpi->common; + CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; + int frame_period = 10; + int thresh_consec_zeromv = 8; + unsigned int thresh_sum_diff = 128; + int num_frames_estimate = 20; + int min_blocks_estimate = cm->mi_rows * cm->mi_cols >> 7; + // Estimate of noise level every frame_period frames. + // Estimate is between current source and last source. + if (cm->current_video_frame % frame_period != 0 || + cpi->denoiser.last_source.y_buffer == NULL) { + copy_frame(&cpi->denoiser.last_source, cpi->Source); + return; + } else { + int num_samples = 0; + uint64_t avg_est = 0; + int bsize = BLOCK_16X16; + static const unsigned char const_source[16] = { + 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128}; + // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have + // been encoded as zero/small mv at least x consecutive frames, compute + // the variance to update estimate of noise in the source. + const uint8_t *src_y = cpi->Source->y_buffer; + const int src_ystride = cpi->Source->y_stride; + const uint8_t *last_src_y = cpi->denoiser.last_source.y_buffer; + const int last_src_ystride = cpi->denoiser.last_source.y_stride; + const uint8_t *src_u = cpi->Source->u_buffer; + const uint8_t *src_v = cpi->Source->v_buffer; + const int src_uvstride = cpi->Source->uv_stride; + const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1; + const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1; + const int uv_width_shift = y_width_shift >> 1; + const int uv_height_shift = y_height_shift >> 1; + int mi_row, mi_col; + for (mi_row = 0; mi_row < cm->mi_rows; mi_row ++) { + for (mi_col = 0; mi_col < cm->mi_cols; mi_col ++) { + // 16x16 blocks, 1/4 sample of frame. + if (mi_row % 4 == 0 && mi_col % 4 == 0) { + int bl_index = mi_row * cm->mi_cols + mi_col; + int bl_index1 = bl_index + 1; + int bl_index2 = bl_index + cm->mi_cols; + int bl_index3 = bl_index2 + 1; + // Only consider blocks that are likely steady background. i.e, have + // been encoded as zero/low motion x (= thresh_consec_zeromv) frames + // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all + // 4 sub-blocks for 16x16 block. Also, avoid skin blocks. + const uint8_t ysource = + src_y[y_height_shift * src_ystride + y_width_shift]; + const uint8_t usource = + src_u[uv_height_shift * src_uvstride + uv_width_shift]; + const uint8_t vsource = + src_v[uv_height_shift * src_uvstride + uv_width_shift]; + int is_skin = vp9_skin_pixel(ysource, usource, vsource); + if (cr->consec_zero_mv[bl_index] > thresh_consec_zeromv && + cr->consec_zero_mv[bl_index1] > thresh_consec_zeromv && + cr->consec_zero_mv[bl_index2] > thresh_consec_zeromv && + cr->consec_zero_mv[bl_index3] > thresh_consec_zeromv && + !is_skin) { + // Compute variance. + unsigned int sse; + unsigned int variance = cpi->fn_ptr[bsize].vf(src_y, + src_ystride, + last_src_y, + last_src_ystride, + &sse); + // Only consider this block as valid for noise measurement if the + // average term (sse - variance = N * avg^{2}, N = 16X16) of the + // temporal residual is small (avoid effects from lighting change). + if ((sse - variance) < thresh_sum_diff) { + unsigned int sse2; + const unsigned int spatial_variance = + cpi->fn_ptr[bsize].vf(src_y, src_ystride, const_source, + 0, &sse2); + avg_est += variance / (10 + spatial_variance); + num_samples++; + } + } + } + src_y += 8; + last_src_y += 8; + src_u += 4; + src_v += 4; + } + src_y += (src_ystride << 3) - (cm->mi_cols << 3); + last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3); + src_u += (src_uvstride << 2) - (cm->mi_cols << 2); + src_v += (src_uvstride << 2) - (cm->mi_cols << 2); + } + // Update noise estimate if we have at a minimum number of block samples, + // and avg_est > 0 (avg_est == 0 can happen if the application inputs + // duplicate frames). + if (num_samples > min_blocks_estimate && avg_est > 0) { + // Normalize. + avg_est = (avg_est << 8) / num_samples; + // Update noise estimate. + cpi->denoiser.noise_estimate = (3 * cpi->denoiser.noise_estimate + + avg_est) >> 2; + cpi->denoiser.noise_estimate_count++; + if (cpi->denoiser.noise_estimate_count == num_frames_estimate) { + // Reset counter and check noise level condition. + cpi->denoiser.noise_estimate_count = 0; + if (cpi->denoiser.noise_estimate > cpi->denoiser.thresh_noise_estimate) + cpi->denoiser.no_denoising = 0; + else + cpi->denoiser.no_denoising = 1; + } + } + } + copy_frame(&cpi->denoiser.last_source, cpi->Source); } #ifdef OUTPUT_YUV_DENOISED diff --git a/vp9/encoder/vp9_denoiser.h b/vp9/encoder/vp9_denoiser.h index ec0b25e01..ad1687e7d 100644 --- a/vp9/encoder/vp9_denoiser.h +++ b/vp9/encoder/vp9_denoiser.h @@ -29,10 +29,17 @@ typedef enum vp9_denoiser_decision { typedef struct vp9_denoiser { YV12_BUFFER_CONFIG running_avg_y[MAX_REF_FRAMES]; YV12_BUFFER_CONFIG mc_running_avg_y; + YV12_BUFFER_CONFIG last_source; int increase_denoising; int frame_buffer_initialized; + int no_denoising; + int noise_estimate; + int thresh_noise_estimate; + int noise_estimate_count; } VP9_DENOISER; +struct VP9_COMP; + void vp9_denoiser_update_frame_info(VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type, @@ -69,6 +76,12 @@ static int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) { void vp9_denoiser_free(VP9_DENOISER *denoiser); +void vp9_denoiser_init_noise_estimate(VP9_DENOISER *denoiser, + int width, + int height); + +void vp9_denoiser_update_noise_estimate(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 5b75d672f..72eafec40 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3250,6 +3250,13 @@ static void encode_without_recode_loop(VP9_COMP *cpi, &cpi->scaled_last_source, (cpi->oxcf.pass == 0)); +#if CONFIG_VP9_TEMPORAL_DENOISING + if (cpi->oxcf.noise_sensitivity > 0 && + cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { + vp9_denoiser_update_noise_estimate(cpi); + } +#endif + if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->resize_state == 0 && -- 2.40.0