From: Debargha Mukherjee <debargha@google.com>
Date: Tue, 21 Apr 2015 16:29:56 +0000 (-0700)
Subject: Some minor improvements in bilateral filter expt.
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=425a45a45cf4293ed512f8ea52ddf21569508ac4;p=libvpx

Some minor improvements in bilateral filter expt.

Changes include:

* Uses double for RD cost computation to guard against overflow
for large resolution frames.
* Use previous frame's filter level to code the level better.
* Change precision of the filter parameters.
* Allow spatial variance for x and y to be different

Change-Id: I1669f65eb0ab1e8519962954c92d59e04f1277b7
derflr: +0.556% (a little up from before)
---

diff --git a/vp9/common/vp9_entropymode.c b/vp9/common/vp9_entropymode.c
index 49c07a264..b2799b3ae 100644
--- a/vp9/common/vp9_entropymode.c
+++ b/vp9/common/vp9_entropymode.c
@@ -1257,6 +1257,10 @@ void vp9_setup_past_independence(VP9_COMMON *cm) {
 
   // To force update of the sharpness
   lf->last_sharpness_level = -1;
+#if CONFIG_LOOP_POSTFILTER
+  lf->bilateral_level = 0;
+  lf->last_bilateral_level = 0;
+#endif
 
   vp9_default_coef_probs(cm);
   vp9_init_mode_probs(&cm->fc);
diff --git a/vp9/common/vp9_loopfilter.c b/vp9/common/vp9_loopfilter.c
index d086c4fcc..e8b734e36 100644
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -242,7 +242,7 @@ int vp9_bilateral_level_bits(const VP9_COMMON *const cm) {
 
 int vp9_loop_bilateral_used(int level, int kf) {
   const bilateral_params_t param = vp9_bilateral_level_to_params(level, kf);
-  return (param.sigma_x && param.sigma_r);
+  return (param.sigma_x && param.sigma_y && param.sigma_r);
 }
 
 void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
@@ -250,11 +250,14 @@ void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
   lfi->bilateral_used = vp9_loop_bilateral_used(level, kf);
   if (lfi->bilateral_used) {
     if (param.sigma_x != lfi->bilateral_sigma_x_set ||
+        param.sigma_y != lfi->bilateral_sigma_y_set ||
         param.sigma_r != lfi->bilateral_sigma_r_set) {
       const int sigma_x = param.sigma_x;
+      const int sigma_y = param.sigma_y;
       const int sigma_r = param.sigma_r;
       const double sigma_r_d = (double)sigma_r / BILATERAL_PRECISION;
       const double sigma_x_d = (double)sigma_x / BILATERAL_PRECISION;
+      const double sigma_y_d = (double)sigma_y / BILATERAL_PRECISION;
       double *wr_lut_ = lfi->wr_lut + 255;
       double *wx_lut_ = lfi->wx_lut + BILATERAL_HALFWIN * (1 + BILATERAL_WIN);
       int i, x, y;
@@ -265,9 +268,11 @@ void vp9_loop_bilateral_init(loop_filter_info_n *lfi, int level, int kf) {
       for (y = -BILATERAL_HALFWIN; y <= BILATERAL_HALFWIN; y++)
         for (x = -BILATERAL_HALFWIN; x <= BILATERAL_HALFWIN; x++) {
           wx_lut_[y * BILATERAL_WIN + x] =
-              exp(-(x * x + y * y) / (2 * sigma_x_d * sigma_x_d));
+              exp(-(x * x) / (2 * sigma_x_d * sigma_x_d) -
+                   (y * y) / (2 * sigma_y_d * sigma_y_d));
         }
       lfi->bilateral_sigma_x_set = sigma_x;
+      lfi->bilateral_sigma_y_set = sigma_y;
       lfi->bilateral_sigma_r_set = sigma_r;
     }
   }
diff --git a/vp9/common/vp9_loopfilter.h b/vp9/common/vp9_loopfilter.h
index 78aea1108..3301035dc 100644
--- a/vp9/common/vp9_loopfilter.h
+++ b/vp9/common/vp9_loopfilter.h
@@ -38,49 +38,50 @@ struct VP9Common;
 #define BILATERAL_LEVELS        (1 << BILATERAL_LEVEL_BITS)
 #define DEF_BILATERAL_LEVEL     2
 
-#define BILATERAL_PRECISION     8
+#define BILATERAL_PRECISION     16
 #define BILATERAL_HALFWIN       3
 #define BILATERAL_WIN           (2 * BILATERAL_HALFWIN + 1)
 
 typedef struct bilateral_params {
-  int sigma_x;  // spatial variance
+  int sigma_x;  // spatial variance x
+  int sigma_y;  // spatial variance y
   int sigma_r;  // range variance
 } bilateral_params_t;
 
 static bilateral_params_t
     bilateral_level_to_params_arr[BILATERAL_LEVELS + 1] = {
-  // Values are rounded to 1/8 th precision
-  {0, 0},    // 0 - default
-  {4, 16},
-  {5, 16},
-  {6, 16},
-  {7, 16},
-  {9, 18},
-  {12, 20},
-  {16, 20},
-  {20, 20},
+  // Values are rounded to 1/16 th precision
+  {0, 0, 0},    // 0 - default
+  {8, 9, 30},
+  {9, 8, 30},
+  {9, 11, 32},
+  {11, 9, 32},
+  {14, 14, 32},
+  {18, 18, 36},
+  {24, 24, 40},
+  {32, 32, 40},
 };
 
 static bilateral_params_t
     bilateral_level_to_params_arr_kf[BILATERAL_LEVELS_KF + 1] = {
-  // Values are rounded to 1/8 th precision
-  {0, 0},    // 0 - default
-  {4, 16},
-  {5, 16},
-  {6, 16},
-  {7, 16},
-  {9, 18},
-  {12, 20},
-  {15, 22},
-  {18, 24},
-  {21, 24},
-  {24, 24},
-  {24, 28},
-  {28, 24},
-  {28, 28},
-  {28, 32},
-  {32, 24},
-  {32, 28},
+  // Values are rounded to 1/16 th precision
+  {0, 0, 0},    // 0 - default
+  {8, 8, 30},
+  {9, 9, 32},
+  {10, 10, 32},
+  {12, 12, 32},
+  {14, 14, 32},
+  {18, 18, 36},
+  {24, 24, 40},
+  {30, 30, 44},
+  {36, 36, 48},
+  {42, 42, 48},
+  {48, 48, 48},
+  {48, 48, 56},
+  {56, 56, 48},
+  {56, 56, 56},
+  {56, 56, 64},
+  {64, 64, 48},
 };
 
 int vp9_bilateral_level_bits(const struct VP9Common *const cm);
@@ -112,6 +113,7 @@ struct loopfilter {
 
 #if CONFIG_LOOP_POSTFILTER
   int bilateral_level;
+  int last_bilateral_level;
 #endif
 };
 
@@ -130,6 +132,7 @@ typedef struct {
   double wx_lut[BILATERAL_WIN * BILATERAL_WIN];
   double wr_lut[512];
   int bilateral_sigma_x_set;
+  int bilateral_sigma_y_set;
   int bilateral_sigma_r_set;
   int bilateral_used;
 #endif
diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c
index 8fd4a3e9d..b8d740eda 100644
--- a/vp9/decoder/vp9_decodeframe.c
+++ b/vp9/decoder/vp9_decodeframe.c
@@ -1631,9 +1631,15 @@ static void setup_loopfilter(VP9_COMMON *cm,
 #if CONFIG_LOOP_POSTFILTER
   lf->bilateral_level = vp9_rb_read_bit(rb);
   if (lf->bilateral_level) {
-    lf->bilateral_level += vp9_rb_read_literal(
-        rb, vp9_bilateral_level_bits(cm));
+    int level = vp9_rb_read_literal(rb, vp9_bilateral_level_bits(cm));
+    lf->bilateral_level = level + (level >= lf->last_bilateral_level);
+  } else {
+    lf->bilateral_level = lf->last_bilateral_level;
   }
+  if (cm->frame_type != KEY_FRAME)
+    cm->lf.last_bilateral_level = cm->lf.bilateral_level;
+  else
+    cm->lf.last_bilateral_level = 0;
 #endif  // CONFIG_LOOP_POSTFILTER
 }
 
@@ -2377,6 +2383,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
       }
     }
   }
+
 #if CONFIG_VP9_HIGHBITDEPTH
   get_frame_new_buffer(cm)->bit_depth = cm->bit_depth;
 #endif
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c
index 8f9e01634..1d5fb43af 100644
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1527,10 +1527,13 @@ static void encode_loopfilter(VP9_COMMON *cm,
     }
   }
 #if CONFIG_LOOP_POSTFILTER
-  vp9_wb_write_bit(wb, lf->bilateral_level > 0);
-  if (lf->bilateral_level > 0)
-    vp9_wb_write_literal(wb, lf->bilateral_level - 1,
+  vp9_wb_write_bit(wb, lf->bilateral_level != lf->last_bilateral_level);
+  if (lf->bilateral_level != lf->last_bilateral_level) {
+    int level = lf->bilateral_level -
+                (lf->bilateral_level > lf->last_bilateral_level);
+    vp9_wb_write_literal(wb, level,
                          vp9_bilateral_level_bits(cm));
+  }
 #endif  // CONFIG_LOOP_POSTFILTER
 }
 
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index a822b3e8b..f5a462790 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3423,6 +3423,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
   // Pick the loop filter level for the frame.
   loopfilter_frame(cpi, cm);
 
+  // printf("Bilateral level: %d\n", cm->lf.bilateral_level);
+
   // build the bitstream
   vp9_pack_bitstream(cpi, dest, size);
 
@@ -3463,6 +3465,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
 
   cm->last_frame_type = cm->frame_type;
 
+#if CONFIG_LOOP_POSTFILTER
+  if (cm->frame_type != KEY_FRAME)
+    cm->lf.last_bilateral_level = cm->lf.bilateral_level;
+  else
+    cm->lf.last_bilateral_level = 0;
+#endif
+
   if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
     vp9_rc_postencode_update(cpi, *size);
 
diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c
index 1da6c32e5..e225bd9a6 100644
--- a/vp9/encoder/vp9_picklpf.c
+++ b/vp9/encoder/vp9_picklpf.c
@@ -87,15 +87,16 @@ static int try_bilateral_frame(const YV12_BUFFER_CONFIG *sd,
 static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
                                       VP9_COMP *cpi,
                                       int filter_level, int partial_frame,
-                                      int64_t *best_cost_ret) {
+                                      double *best_cost_ret) {
   VP9_COMMON *const cm = &cpi->common;
   int i, bilateral_best, err;
-  int64_t best_cost;
-  int64_t cost[BILATERAL_LEVELS_KF];
+  double best_cost;
+  double cost[BILATERAL_LEVELS_KF];
   const int bilateral_level_bits = vp9_bilateral_level_bits(&cpi->common);
   const int bilateral_levels = 1 << bilateral_level_bits;
 #ifdef USE_RD_LOOP_POSTFILTER_SEARCH
   MACROBLOCK *x = &cpi->mb;
+  int bits;
 #endif
 
   //  Make a copy of the unfiltered / processed recon buffer
@@ -107,9 +108,10 @@ static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
   bilateral_best = 0;
   err = try_bilateral_frame(sd, cpi, 0, partial_frame);
 #ifdef USE_RD_LOOP_POSTFILTER_SEARCH
-  cost[0] = RDCOST(x->rdmult, x->rddiv, 0, err);
+  bits = cm->lf.last_bilateral_level == 0 ? 0 : bilateral_level_bits;
+  cost[0] = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err);
 #else
-  cost[0] = err;
+  cost[0] = (double)err;
 #endif
   best_cost = cost[0];
   for (i = 1; i <= bilateral_levels; ++i) {
@@ -118,10 +120,10 @@ static int64_t search_bilateral_level(const YV12_BUFFER_CONFIG *sd,
     // Normally the rate is rate in bits * 256 and dist is sum sq err * 64
     // when RDCOST is used.  However below we just scale both in the correct
     // ratios appropriately but not exactly by these values.
-    cost[i] = RDCOST(x->rdmult, x->rddiv,
-                     bilateral_level_bits << 2, err);
+    bits = cm->lf.last_bilateral_level == i ? 0 : bilateral_level_bits;
+    cost[i] = RDCOST_DBL(x->rdmult, x->rddiv, (bits << 2), err);
 #else
-    cost[i] = err;
+    cost[i] = (double)err;
 #endif
     if (cost[i] < best_cost) {
       bilateral_best = i;
@@ -144,18 +146,19 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
   const int max_filter_level = get_max_filter_level(cpi);
   int filt_direction = 0;
   int filt_best, bilateral_best;
-  int64_t best_err;
+  double best_err;
+  int i;
 
   // Start the search at the previous frame filter level unless it is now out of
   // range.
   int filt_mid = clamp(lf->filter_level, min_filter_level, max_filter_level);
   int filter_step = filt_mid < 16 ? 4 : filt_mid / 4;
-  // Sum squared error at each filter level
-  int64_t ss_err[MAX_LOOP_FILTER + 1];
+  double ss_err[MAX_LOOP_FILTER + 1];
   int bilateral;
 
   // Set each entry to -1
-  vpx_memset(ss_err, 0xFF, sizeof(ss_err));
+  for (i = 0; i <= MAX_LOOP_FILTER; ++i)
+    ss_err[i] = -1.0;
 
   bilateral = search_bilateral_level(sd, cpi, filt_mid,
                                      partial_frame, &best_err);
@@ -168,14 +171,14 @@ static int search_filter_bilateral_level(const YV12_BUFFER_CONFIG *sd,
     const int filt_low = MAX(filt_mid - filter_step, min_filter_level);
 
     // Bias against raising loop filter in favor of lowering it.
-    int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
+    double bias = (best_err / (1 << (15 - (filt_mid / 8)))) * filter_step;
 
     if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20))
       bias = (bias * cpi->twopass.section_intra_rating) / 20;
 
     // yx, bias less for large block size
     if (cm->tx_mode != ONLY_4X4)
-      bias >>= 1;
+      bias /= 2;
 
     if (filt_direction <= 0 && filt_low != filt_mid) {
       // Get Low filter error score
diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h
index 6383f556b..b20f84332 100644
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -25,10 +25,10 @@ extern "C" {
 #define RDDIV_BITS          7
 
 #define RDCOST(RM, DM, R, D) \
-  (((128 + ((int64_t)R) * (RM)) >> 8) + (D << DM))
+  (((128 + ((int64_t)R) * (RM)) >> 8) + ((D) << (DM)))
 
 #define RDCOST_DBL(RM, DM, R, D) \
-  (((((double)R) * (RM)) / 256.0) + ((double)D  * (1 << DM)))
+  (((((double)(R)) * (RM)) / 256.0) + ((double)(D)  * (1 << (DM))))
 
 #define QIDX_SKIP_THRESH     115