2 * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
15 #include "./vpx_dsp_rtcd.h"
16 #include "vpx_dsp/vpx_dsp_common.h"
17 #include "vpx_scale/yv12config.h"
18 #include "vpx/vpx_integer.h"
19 #include "vp9/common/vp9_reconinter.h"
20 #include "vp9/encoder/vp9_context_tree.h"
21 #include "vp9/encoder/vp9_denoiser.h"
22 #include "vp9/encoder/vp9_encoder.h"
24 // OUTPUT_YUV_DENOISED
26 #ifdef OUTPUT_YUV_DENOISED
27 static void make_grayscale(YV12_BUFFER_CONFIG *yuv);
30 static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) {
32 return 3 + (increase_denoising ? 1 : 0);
35 static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) {
37 (void)increase_denoising;
41 static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) {
43 (void)increase_denoising;
47 static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) {
48 return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 80 : 40);
51 static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising,
52 int motion_magnitude) {
53 if (motion_magnitude > noise_motion_thresh(bs, increase_denoising)) {
54 if (increase_denoising)
55 return (1 << num_pels_log2_lookup[bs]) << 2;
59 return (1 << num_pels_log2_lookup[bs]) << 4;
63 static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) {
64 return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2);
67 // TODO(jackychen): If increase_denoising is enabled in the future,
68 // we might need to update the code for calculating 'total_adj' in
69 // case the C code is not bit-exact with corresponding sse2 code.
70 int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride,
71 const uint8_t *mc_avg, int mc_avg_stride,
72 uint8_t *avg, int avg_stride, int increase_denoising,
73 BLOCK_SIZE bs, int motion_magnitude) {
75 const uint8_t *sig_start = sig;
76 const uint8_t *mc_avg_start = mc_avg;
77 uint8_t *avg_start = avg;
78 int diff, adj, absdiff, delta;
79 int adj_val[] = { 3, 4, 6 };
83 // If motion_magnitude is small, making the denoiser more aggressive by
84 // increasing the adjustment for each level. Add another increment for
85 // blocks that are labeled for increase denoising.
86 if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) {
87 if (increase_denoising) {
90 adj_val[0] += shift_inc;
91 adj_val[1] += shift_inc;
92 adj_val[2] += shift_inc;
95 // First attempt to apply a strong temporal denoising filter.
96 for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
97 for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
98 diff = mc_avg[c] - sig[c];
101 if (absdiff <= absdiff_thresh(bs, increase_denoising)) {
109 case 7: adj = adj_val[0]; break;
117 case 15: adj = adj_val[1]; break;
118 default: adj = adj_val[2];
121 avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj);
124 avg[c] = VPXMAX(0, sig[c] - adj);
131 mc_avg += mc_avg_stride;
134 // If the strong filter did not modify the signal too much, we're all set.
135 if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) {
139 // Otherwise, we try to dampen the filter if the delta is not too high.
140 delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising)) >>
141 num_pels_log2_lookup[bs]) +
144 if (delta >= delta_thresh(bs, increase_denoising)) {
148 mc_avg = mc_avg_start;
151 for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) {
152 for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) {
153 diff = mc_avg[c] - sig[c];
159 // Diff positive means we made positive adjustment above
160 // (in first try/attempt), so now make negative adjustment to bring
161 // denoised signal down.
162 avg[c] = VPXMAX(0, avg[c] - adj);
165 // Diff negative means we made negative adjustment above
166 // (in first try/attempt), so now make positive adjustment to bring
167 // denoised signal up.
168 avg[c] = VPXMIN(UINT8_MAX, avg[c] + adj);
174 mc_avg += mc_avg_stride;
177 // We can use the filter if it has been sufficiently dampened
178 if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) {
184 static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row,
186 return framebuf + (stride * mi_row << 3) + (mi_col << 3);
189 static VP9_DENOISER_DECISION perform_motion_compensation(
190 VP9_COMMON *const cm, VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs,
191 int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
192 int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
193 int num_spatial_layers, int width) {
194 int sse_diff = ctx->zeromv_sse - ctx->newmv_sse;
195 MV_REFERENCE_FRAME frame;
196 MACROBLOCKD *filter_mbd = &mb->e_mbd;
197 MODE_INFO *mi = filter_mbd->mi[0];
200 struct buf_2d saved_dst[MAX_MB_PLANE];
201 struct buf_2d saved_pre[MAX_MB_PLANE];
202 RefBuffer *saved_block_refs[2];
204 frame = ctx->best_reference_frame;
207 if (is_skin && (motion_magnitude > 0 || consec_zeromv < 4)) return COPY_BLOCK;
209 // Avoid denoising small blocks. When noise > kDenLow or frame width > 480,
210 // denoise 16x16 blocks.
211 if (bs == BLOCK_8X8 || bs == BLOCK_8X16 || bs == BLOCK_16X8 ||
212 (bs == BLOCK_16X16 && width > 480 &&
213 denoiser->denoising_level <= kDenLow))
216 // If the best reference frame uses inter-prediction and there is enough of a
217 // difference in sum-squared-error, use it.
218 if (frame != INTRA_FRAME &&
219 (frame != GOLDEN_FRAME || num_spatial_layers == 1) &&
220 ctx->newmv_sse != UINT_MAX &&
221 sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
222 mi->ref_frame[0] = ctx->best_reference_frame;
223 mi->mode = ctx->best_sse_inter_mode;
224 mi->mv[0] = ctx->best_sse_mv;
226 // Otherwise, use the zero reference frame.
227 frame = ctx->best_zeromv_reference_frame;
228 ctx->newmv_sse = ctx->zeromv_sse;
229 // Bias to last reference.
230 if (num_spatial_layers > 1 ||
231 (frame != LAST_FRAME &&
232 ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) ||
233 denoiser->denoising_level >= kDenHigh))) {
235 ctx->newmv_sse = ctx->zeromv_lastref_sse;
237 mi->ref_frame[0] = frame;
239 mi->mv[0].as_int = 0;
240 ctx->best_sse_inter_mode = ZEROMV;
241 ctx->best_sse_mv.as_int = 0;
243 if (denoiser->denoising_level > kDenMedium) {
244 motion_magnitude = 0;
248 if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
249 // Restore everything to its original state
253 if (motion_magnitude > (noise_motion_thresh(bs, increase_denoising) << 3)) {
254 // Restore everything to its original state
259 // We will restore these after motion compensation.
260 for (i = 0; i < MAX_MB_PLANE; ++i) {
261 saved_pre[i] = filter_mbd->plane[i].pre[0];
262 saved_dst[i] = filter_mbd->plane[i].dst;
264 saved_block_refs[0] = filter_mbd->block_refs[0];
266 // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser
268 filter_mbd->plane[0].pre[0].buf =
269 block_start(denoiser->running_avg_y[frame].y_buffer,
270 denoiser->running_avg_y[frame].y_stride, mi_row, mi_col);
271 filter_mbd->plane[0].pre[0].stride = denoiser->running_avg_y[frame].y_stride;
272 filter_mbd->plane[1].pre[0].buf =
273 block_start(denoiser->running_avg_y[frame].u_buffer,
274 denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
275 filter_mbd->plane[1].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
276 filter_mbd->plane[2].pre[0].buf =
277 block_start(denoiser->running_avg_y[frame].v_buffer,
278 denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col);
279 filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride;
281 filter_mbd->plane[0].dst.buf =
282 block_start(denoiser->mc_running_avg_y.y_buffer,
283 denoiser->mc_running_avg_y.y_stride, mi_row, mi_col);
284 filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y.y_stride;
285 filter_mbd->plane[1].dst.buf =
286 block_start(denoiser->mc_running_avg_y.u_buffer,
287 denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
288 filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y.uv_stride;
289 filter_mbd->plane[2].dst.buf =
290 block_start(denoiser->mc_running_avg_y.v_buffer,
291 denoiser->mc_running_avg_y.uv_stride, mi_row, mi_col);
292 filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y.uv_stride;
294 set_ref_ptrs(cm, filter_mbd, frame, NONE);
295 vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs);
297 // Restore everything to its original state
299 filter_mbd->block_refs[0] = saved_block_refs[0];
300 for (i = 0; i < MAX_MB_PLANE; ++i) {
301 filter_mbd->plane[i].pre[0] = saved_pre[i];
302 filter_mbd->plane[i].dst = saved_dst[i];
308 void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
309 BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
310 VP9_DENOISER_DECISION *denoiser_decision) {
312 int motion_magnitude = 0;
313 int zeromv_filter = 0;
314 VP9_DENOISER *denoiser = &cpi->denoiser;
315 VP9_DENOISER_DECISION decision = COPY_BLOCK;
316 YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME];
317 YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y;
318 uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col);
319 uint8_t *mc_avg_start =
320 block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col);
321 struct buf_2d src = mb->plane[0].src;
323 int increase_denoising = 0;
324 int consec_zeromv = 0;
325 mv_col = ctx->best_sse_mv.as_mv.col;
326 mv_row = ctx->best_sse_mv.as_mv.row;
327 motion_magnitude = mv_row * mv_row + mv_col * mv_col;
329 if (cpi->use_skin_detection && bs <= BLOCK_32X32 &&
330 denoiser->denoising_level < kDenHigh) {
331 int motion_level = (motion_magnitude < 16) ? 0 : 1;
332 // If motion for current block is small/zero, compute consec_zeromv for
333 // skin detection (early exit in skin detection is done for large
334 // consec_zeromv when current block has small/zero motion).
336 if (motion_level == 0) {
337 VP9_COMMON *const cm = &cpi->common;
339 // Loop through the 8x8 sub-blocks.
340 const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
341 const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
342 const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
343 const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
344 const int block_index = mi_row * cm->mi_cols + mi_col;
346 for (i = 0; i < ymis; i++) {
347 for (j = 0; j < xmis; j++) {
348 int bl_index = block_index + i * cm->mi_cols + j;
349 consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], consec_zeromv);
350 // No need to keep checking 8x8 blocks if any of the sub-blocks
351 // has small consec_zeromv (since threshold for no_skin based on
352 // zero/small motion in skin detection is high, i.e, > 4).
353 if (consec_zeromv < 4) {
360 // TODO(marpan): Compute skin detection over sub-blocks.
361 is_skin = vp9_compute_skin_block(
362 mb->plane[0].src.buf, mb->plane[1].src.buf, mb->plane[2].src.buf,
363 mb->plane[0].src.stride, mb->plane[1].src.stride, bs, consec_zeromv,
366 if (!is_skin && denoiser->denoising_level == kDenHigh) increase_denoising = 1;
368 // TODO(marpan): There is an issue with denoising for speed 5,
369 // due to the partitioning scheme based on pickmode.
370 // Remove this speed constraint when issue is resolved.
371 if (denoiser->denoising_level >= kDenLow && cpi->oxcf.speed > 5)
372 decision = perform_motion_compensation(
373 &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
374 motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
375 cpi->svc.number_spatial_layers, cpi->Source->y_width);
377 if (decision == FILTER_BLOCK) {
378 decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,
379 mc_avg.y_stride, avg_start, avg.y_stride,
380 increase_denoising, bs, motion_magnitude);
383 if (decision == FILTER_BLOCK) {
384 vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, NULL, 0,
385 NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
386 num_4x4_blocks_high_lookup[bs] << 2);
387 } else { // COPY_BLOCK
388 vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, NULL, 0,
389 NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
390 num_4x4_blocks_high_lookup[bs] << 2);
392 *denoiser_decision = decision;
393 if (decision == FILTER_BLOCK && zeromv_filter == 1)
394 *denoiser_decision = FILTER_ZEROMV_BLOCK;
397 static void copy_frame(YV12_BUFFER_CONFIG *const dest,
398 const YV12_BUFFER_CONFIG *const src) {
400 const uint8_t *srcbuf = src->y_buffer;
401 uint8_t *destbuf = dest->y_buffer;
403 assert(dest->y_width == src->y_width);
404 assert(dest->y_height == src->y_height);
406 for (r = 0; r < dest->y_height; ++r) {
407 memcpy(destbuf, srcbuf, dest->y_width);
408 destbuf += dest->y_stride;
409 srcbuf += src->y_stride;
413 static void swap_frame_buffer(YV12_BUFFER_CONFIG *const dest,
414 YV12_BUFFER_CONFIG *const src) {
415 uint8_t *tmp_buf = dest->y_buffer;
416 assert(dest->y_width == src->y_width);
417 assert(dest->y_height == src->y_height);
418 dest->y_buffer = src->y_buffer;
419 src->y_buffer = tmp_buf;
422 void vp9_denoiser_update_frame_info(
423 VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
424 int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
425 int resized, int svc_base_is_key) {
426 // Copy source into denoised reference buffers on KEY_FRAME or
427 // if the just encoded frame was resized. For SVC, copy source if the base
428 // spatial layer was key frame.
429 if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset ||
432 // Start at 1 so as not to overwrite the INTRA_FRAME
433 for (i = 1; i < MAX_REF_FRAMES; ++i)
434 copy_frame(&denoiser->running_avg_y[i], &src);
439 // If more than one refresh occurs, must copy frame buffer.
440 if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {
441 if (refresh_alt_ref_frame) {
442 copy_frame(&denoiser->running_avg_y[ALTREF_FRAME],
443 &denoiser->running_avg_y[INTRA_FRAME]);
445 if (refresh_golden_frame) {
446 copy_frame(&denoiser->running_avg_y[GOLDEN_FRAME],
447 &denoiser->running_avg_y[INTRA_FRAME]);
449 if (refresh_last_frame) {
450 copy_frame(&denoiser->running_avg_y[LAST_FRAME],
451 &denoiser->running_avg_y[INTRA_FRAME]);
454 if (refresh_alt_ref_frame) {
455 swap_frame_buffer(&denoiser->running_avg_y[ALTREF_FRAME],
456 &denoiser->running_avg_y[INTRA_FRAME]);
458 if (refresh_golden_frame) {
459 swap_frame_buffer(&denoiser->running_avg_y[GOLDEN_FRAME],
460 &denoiser->running_avg_y[INTRA_FRAME]);
462 if (refresh_last_frame) {
463 swap_frame_buffer(&denoiser->running_avg_y[LAST_FRAME],
464 &denoiser->running_avg_y[INTRA_FRAME]);
469 void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) {
470 ctx->zeromv_sse = UINT_MAX;
471 ctx->newmv_sse = UINT_MAX;
472 ctx->zeromv_lastref_sse = UINT_MAX;
473 ctx->best_sse_mv.as_int = 0;
476 void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
477 PREDICTION_MODE mode,
478 PICK_MODE_CONTEXT *ctx) {
479 if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) {
480 ctx->zeromv_sse = sse;
481 ctx->best_zeromv_reference_frame = mi->ref_frame[0];
482 if (mi->ref_frame[0] == LAST_FRAME) ctx->zeromv_lastref_sse = sse;
485 if (mi->mv[0].as_int != 0 && sse < ctx->newmv_sse) {
486 ctx->newmv_sse = sse;
487 ctx->best_sse_inter_mode = mode;
488 ctx->best_sse_mv = mi->mv[0];
489 ctx->best_reference_frame = mi->ref_frame[0];
493 int vp9_denoiser_alloc(VP9_DENOISER *denoiser, int width, int height, int ssx,
495 #if CONFIG_VP9_HIGHBITDEPTH
496 int use_highbitdepth,
500 const int legacy_byte_alignment = 0;
501 assert(denoiser != NULL);
503 for (i = 0; i < MAX_REF_FRAMES; ++i) {
504 fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[i], width, height,
506 #if CONFIG_VP9_HIGHBITDEPTH
509 border, legacy_byte_alignment);
511 vp9_denoiser_free(denoiser);
514 #ifdef OUTPUT_YUV_DENOISED
515 make_grayscale(&denoiser->running_avg_y[i]);
519 fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y, width, height, ssx,
521 #if CONFIG_VP9_HIGHBITDEPTH
524 border, legacy_byte_alignment);
526 vp9_denoiser_free(denoiser);
530 fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy,
531 #if CONFIG_VP9_HIGHBITDEPTH
534 border, legacy_byte_alignment);
536 vp9_denoiser_free(denoiser);
539 #ifdef OUTPUT_YUV_DENOISED
540 make_grayscale(&denoiser->running_avg_y[i]);
542 denoiser->frame_buffer_initialized = 1;
543 denoiser->denoising_level = kDenLow;
544 denoiser->prev_denoising_level = kDenLow;
549 void vp9_denoiser_free(VP9_DENOISER *denoiser) {
551 if (denoiser == NULL) {
554 denoiser->frame_buffer_initialized = 0;
555 for (i = 0; i < MAX_REF_FRAMES; ++i) {
556 vpx_free_frame_buffer(&denoiser->running_avg_y[i]);
558 vpx_free_frame_buffer(&denoiser->mc_running_avg_y);
559 vpx_free_frame_buffer(&denoiser->last_source);
562 void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level) {
563 denoiser->denoising_level = noise_level;
564 if (denoiser->denoising_level > kDenLowLow &&
565 denoiser->prev_denoising_level == kDenLowLow)
569 denoiser->prev_denoising_level = denoiser->denoising_level;
572 // Scale/increase the partition threshold for denoiser speed-up.
573 int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level,
575 if ((content_state == kLowSadLowSumdiff) ||
576 (content_state == kHighSadLowSumdiff) || noise_level == kDenHigh)
577 return (3 * threshold) >> 1;
579 return (5 * threshold) >> 2;
582 // Scale/increase the ac skip threshold for denoiser speed-up.
583 int64_t vp9_scale_acskip_thresh(int64_t threshold,
584 VP9_DENOISER_LEVEL noise_level,
586 if (noise_level >= kDenLow && abs_sumdiff < 5)
587 return threshold *= (noise_level == kDenLow) ? 2 : 6;
592 #ifdef OUTPUT_YUV_DENOISED
593 static void make_grayscale(YV12_BUFFER_CONFIG *yuv) {
595 uint8_t *u = yuv->u_buffer;
596 uint8_t *v = yuv->v_buffer;
598 for (r = 0; r < yuv->uv_height; ++r) {
599 for (c = 0; c < yuv->uv_width; ++c) {
600 u[c] = UINT8_MAX / 2;
601 v[c] = UINT8_MAX / 2;