From ff36b9c78b8a069978c7ba57c31a8b63fbf8b599 Mon Sep 17 00:00:00 2001
From: Jingning Han <jingning@google.com>
Date: Tue, 5 Mar 2019 10:45:16 -0800
Subject: [PATCH] Set up Wiener variance for macroblocks in a frame

This commit introduces a Wiener variance term. For each block in
the source frame, we first estimate its film grain noise level
using median filter in the transform domain. Each transform
coefficient is then processed using Wiener filter to account for
the impact on the energy level due to film grain noise. The result
leads to a second moment of the denoised signal.

Change-Id: Ibce7cb1b0cb8fe1aba807d95289712271d576948
---
 vp9/encoder/vp9_encoder.c        | 122 ++++++++++++++++++++++++++++++-
 vp9/encoder/vp9_encoder.h        |   3 +
 vp9/encoder/vp9_speed_features.c |   4 +
 vp9/encoder/vp9_speed_features.h |   3 +
 4 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 57900b8fc..08d9e29e6 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -116,6 +116,13 @@ static int is_spatial_denoise_enabled(VP9_COMP *cpi) {
 }
 #endif
 
+#if CONFIG_VP9_HIGHBITDEPTH
+void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+                         TX_SIZE tx_size);
+#endif
+void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+                  TX_SIZE tx_size);
+
 // compute adaptive threshold for skip recoding
 static int compute_context_model_thresh(const VP9_COMP *const cpi) {
   const VP9_COMMON *const cm = &cpi->common;
@@ -977,6 +984,12 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
   vpx_free(cpi->consec_zero_mv);
   cpi->consec_zero_mv = NULL;
 
+  vpx_free(cpi->stack_rank_buffer);
+  cpi->stack_rank_buffer = NULL;
+
+  vpx_free(cpi->mb_wiener_variance);
+  cpi->mb_wiener_variance = NULL;
+
   vp9_free_ref_frame_buffers(cm->buffer_pool);
 #if CONFIG_VP9_POSTPROC
   vp9_free_postproc_buffers(cm);
@@ -2367,6 +2380,14 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
   vp9_set_speed_features_framesize_independent(cpi);
   vp9_set_speed_features_framesize_dependent(cpi);
 
+  if (cpi->sf.enable_wiener_variance) {
+    CHECK_MEM_ERROR(cm, cpi->stack_rank_buffer,
+                    vpx_calloc(UINT16_MAX, sizeof(*cpi->stack_rank_buffer)));
+    CHECK_MEM_ERROR(cm, cpi->mb_wiener_variance,
+                    vpx_calloc(cm->mb_rows * cm->mb_cols,
+                               sizeof(*cpi->mb_wiener_variance)));
+  }
+
 #if CONFIG_NON_GREEDY_MV
   cpi->feature_score_loc_alloc = 0;
   cpi->tpl_ready = 0;
@@ -4691,6 +4712,97 @@ static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
   }
 }
 
+// Process the wiener variance in 16x16 block basis.
+static void set_mb_wiener_variance(VP9_COMP *cpi) {
+  VP9_COMMON *cm = &cpi->common;
+  uint8_t *buffer = cpi->Source->y_buffer;
+  int buf_stride = cpi->Source->y_stride;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  ThreadData *td = &cpi->td;
+  MACROBLOCK *x = &td->mb;
+  MACROBLOCKD *xd = &x->e_mbd;
+  DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]);
+  DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]);
+  uint8_t *zero_pred;
+#else
+  DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]);
+#endif
+
+  DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
+  DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
+
+  int mb_row, mb_col;
+  // Hard coded operating block size
+  const int block_size = 16;
+  const int coeff_count = block_size * block_size;
+  const TX_SIZE tx_size = TX_16X16;
+
+  if (cpi->sf.enable_wiener_variance == 0) return;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+    zero_pred = CONVERT_TO_BYTEPTR(zero_pred16);
+  else
+    zero_pred = zero_pred8;
+#endif
+
+  memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count);
+
+  for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
+    for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
+      int idx, hist_count = 0;
+      int16_t median_val = 0;
+      uint8_t *mb_buffer =
+          buffer + mb_row * block_size * buf_stride + mb_col * block_size;
+      int64_t wiener_variance = 0;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+      if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+        vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size,
+                                  mb_buffer, buf_stride, zero_pred, block_size,
+                                  xd->bd);
+        highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
+      } else {
+        vpx_subtract_block(block_size, block_size, src_diff, block_size,
+                           mb_buffer, buf_stride, zero_pred, block_size);
+        wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
+      }
+#else
+      vpx_subtract_block(block_size, block_size, src_diff, block_size,
+                         mb_buffer, buf_stride, zero_pred, block_size);
+      wht_fwd_txfm(src_diff, block_size, coeff, tx_size);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+      for (idx = 0; idx < UINT16_MAX; ++idx) cpi->stack_rank_buffer[idx] = 0;
+
+      for (idx = 0; idx < coeff_count; ++idx)
+        ++cpi->stack_rank_buffer[abs(coeff[idx])];
+
+      for (idx = 0; idx < UINT16_MAX; ++idx) {
+        hist_count += cpi->stack_rank_buffer[idx];
+        if (hist_count >= coeff_count / 2) break;
+      }
+
+      // Noise level estimation
+      median_val = idx;
+
+      // Wiener filter
+      for (idx = 1; idx < coeff_count; ++idx) {
+        int sign = coeff[idx] < 0;
+        int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx];
+        coeff[idx] = (int16_t)((sqr_coeff * coeff[idx]) /
+                               (sqr_coeff + (int64_t)median_val * median_val));
+        if (sign) coeff[idx] = -coeff[idx];
+
+        wiener_variance += (int64_t)coeff[idx] * coeff[idx];
+      }
+      cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] =
+          wiener_variance / coeff_count;
+    }
+  }
+}
+
 static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
                                       uint8_t *dest,
                                       unsigned int *frame_flags) {
@@ -4777,6 +4889,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
     }
   }
 
+  set_mb_wiener_variance(cpi);
+
   vpx_clear_system_state();
 
 #if CONFIG_INTERNAL_STATS
@@ -5827,8 +5941,8 @@ static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
 }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-static void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
-                                TX_SIZE tx_size) {
+void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+                         TX_SIZE tx_size) {
   // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
   switch (tx_size) {
     case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
@@ -5839,8 +5953,8 @@ static void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
-                         TX_SIZE tx_size) {
+void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+                  TX_SIZE tx_size) {
   switch (tx_size) {
     case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
     case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h
index d1a782bfb..847290556 100644
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -628,6 +628,9 @@ typedef struct VP9_COMP {
   int ext_refresh_frame_context_pending;
   int ext_refresh_frame_context;
 
+  int64_t *mb_wiener_variance;
+  int *stack_rank_buffer;
+
   YV12_BUFFER_CONFIG last_frame_uf;
 
   TOKENEXTRA *tile_tok[4][1 << 6];
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 8c59662c3..c8e5795ee 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -939,6 +939,10 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
   sf->tx_size_search_breakout = 1;
   sf->tx_size_search_depth = 2;
 
+  // Manually turn this on during experimentation. Off by default to disable its
+  // effect on the baseline encoder.
+  sf->enable_wiener_variance = 0;
+
   sf->exhaustive_searches_thresh =
       (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
                                                               : INT_MAX;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 8609c9837..4261fca14 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -288,6 +288,9 @@ typedef struct SPEED_FEATURES {
   // level within a frame.
   int allow_skip_recode;
 
+  // Enable Wiener filter based block complexity analysis.
+  int enable_wiener_variance;
+
   // Coefficient probability model approximation step size
   int coeff_prob_appx_step;
 
-- 
2.49.0