From 635ae8bdc1f5f1fe9e94c2f14144ad4c8750b627 Mon Sep 17 00:00:00 2001
From: paulwilkins <paulwilkins@google.com>
Date: Thu, 18 Aug 2016 14:15:25 +0100
Subject: [PATCH] Adjust  coefficient optimization  and tx_domain rd speed
 features.

Previously Tx domain rd was used in all cases above speed 0.
Coefficient optimization was only enabled for best and speed 0.

This patch selectively sets these features at other speed settings
based on block complexity.

For the Netflix and HD sets in particular the quality gains are
large compared to the speed hit. At speed 1 the average psnr
gain in the NF set  is > 2.5% with one clip coming in at 18%
and some points almost 30%.  Average gains for the lower
resolution test sets are around 1%.

The gains are biggest at low Q so some further optimization
may be possible.

Change-Id: I340376c7b2a78e5389a34b7ebdc41072808d0576
---
 vp9/encoder/vp9_block.h          |  2 ++
 vp9/encoder/vp9_encodeframe.c    | 16 +++++++++++++++
 vp9/encoder/vp9_rdopt.c          | 12 +++++------
 vp9/encoder/vp9_speed_features.c | 35 +++++++++++++++++++++++---------
 vp9/encoder/vp9_speed_features.h |  6 ++++--
 5 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h
index b0f4fc7c3..1ea5fdf1f 100644
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -71,6 +71,8 @@ struct macroblock {
   int skip_recode;
   int skip_optimize;
   int q_index;
+  int block_qcoeff_opt;
+  int block_tx_domain;
 
   // The equivalent error at the current rdmult of one whole bit (not one
   // bitcost unit).
diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index ba25ca26b..e1f38e9e4 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1336,6 +1336,22 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
   // Save rdmult before it might be changed, so it can be restored later.
   orig_rdmult = x->rdmult;
 
+  if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) {
+    double logvar = vp9_log_block_var(cpi, x, bsize);
+    // Check block complexity as part of descision on using pixel or transform
+    // domain distortion in rd tests.
+    x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion &&
+                         (logvar >= cpi->sf.tx_domain_thresh);
+
+    // Check block complexity as part of descision on using quantized
+    // coefficient optimisation inside the rd loop.
+    x->block_qcoeff_opt =
+        cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh);
+  } else {
+    x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion;
+    x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
+  }
+
   if (aq_mode == VARIANCE_AQ) {
     const int energy =
         bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize);
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c
index 8cb90e893..50b365e5d 100644
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -535,7 +535,7 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
   const struct macroblock_plane *const p = &x->plane[plane];
   const struct macroblockd_plane *const pd = &xd->plane[plane];
 
-  if (cpi->sf.txfm_domain_distortion) {
+  if (x->block_tx_domain) {
     const int ss_txfrm_size = tx_size << 1;
     int64_t this_sse;
     const int shift = tx_size == TX_32X32 ? 0 : 2;
@@ -663,11 +663,11 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
   if (args->exit_early) return;
 
   if (!is_inter_block(mi)) {
-    struct encode_b_args intra_arg = { x, args->cpi->sf.quant_coeff_opt,
-                                       args->t_above, args->t_left, &mi->skip };
+    struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above,
+                                       args->t_left, &mi->skip };
     vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size,
                            &intra_arg);
-    if (args->cpi->sf.txfm_domain_distortion) {
+    if (x->block_tx_domain) {
       dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
                  tx_size, &dist, &sse);
     } else {
@@ -697,7 +697,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
         SKIP_TXFM_NONE) {
       // full forward transform and quantization
       vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
-      if (args->cpi->sf.quant_coeff_opt)
+      if (x->block_qcoeff_opt)
         vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
       dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
                  tx_size, &dist, &sse);
@@ -731,7 +731,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
   } else {
     // full forward transform and quantization
     vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
-    if (args->cpi->sf.quant_coeff_opt)
+    if (x->block_qcoeff_opt)
       vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
     dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
                tx_size, &dist, &sse);
diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c
index 4e288287a..8dbadcae1 100644
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -137,6 +137,9 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
   }
 }
 
+static double tx_dom_thresholds[6] = { 99.0, 14.0, 12.0, 8.0, 4.0, 0.0 };
+static double qopt_thresholds[6] = { 99.0, 12.0, 10.0, 4.0, 2.0, 0.0 };
+
 static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
                                    SPEED_FEATURES *sf, int speed) {
   const int boosted = frame_is_boosted(cpi);
@@ -151,14 +154,24 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
   sf->use_square_only_threshold = BLOCK_16X16;
 
   if (speed >= 1) {
-    if ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ||
-        vp9_internal_image_edge(cpi)) {
-      sf->use_square_partition_only = !frame_is_boosted(cpi);
+    if (cpi->oxcf.pass == 2) {
+      TWO_PASS *const twopass = &cpi->twopass;
+      if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
+          vp9_internal_image_edge(cpi)) {
+        sf->use_square_partition_only = !frame_is_boosted(cpi);
+      } else {
+        sf->use_square_partition_only = !frame_is_intra_only(cm);
+      }
     } else {
       sf->use_square_partition_only = !frame_is_intra_only(cm);
     }
-    sf->use_square_only_threshold = BLOCK_4X4;
 
+    sf->allow_txfm_domain_distortion = 1;
+    sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5];
+    sf->allow_quant_coeff_opt = sf->optimize_coefficients;
+    sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5];
+
+    sf->use_square_only_threshold = BLOCK_4X4;
     sf->less_rectangular_check = 1;
 
     sf->use_rd_breakout = 1;
@@ -174,8 +187,6 @@ static void set_good_speed_feature(VP9_COMP *cpi, VP9_COMMON *cm,
     sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V;
     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
     sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V;
-    sf->txfm_domain_distortion = 1;
-    sf->quant_coeff_opt = 0;
   }
 
   if (speed >= 2) {
@@ -294,8 +305,10 @@ static void set_rt_speed_feature(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed,
   sf->exhaustive_searches_thresh = INT_MAX;
 
   if (speed >= 1) {
-    sf->txfm_domain_distortion = 1;
-    sf->quant_coeff_opt = 0;
+    sf->allow_txfm_domain_distortion = 1;
+    sf->tx_domain_thresh = 0.0;
+    sf->allow_quant_coeff_opt = 0;
+    sf->quant_opt_thresh = 0.0;
     sf->use_square_partition_only = !frame_is_intra_only(cm);
     sf->less_rectangular_check = 1;
     sf->tx_size_search_method =
@@ -566,8 +579,10 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
   sf->disable_filter_search_var_thresh = 0;
   sf->adaptive_interp_filter_search = 0;
   sf->allow_partition_search_skip = 0;
-  sf->txfm_domain_distortion = 0;
-  sf->quant_coeff_opt = sf->optimize_coefficients;
+  sf->allow_txfm_domain_distortion = 0;
+  sf->tx_domain_thresh = 99.0;
+  sf->allow_quant_coeff_opt = sf->optimize_coefficients;
+  sf->quant_opt_thresh = 99.0;
 
   for (i = 0; i < TX_SIZES; i++) {
     sf->intra_y_mode_mask[i] = INTRA_ALL;
diff --git a/vp9/encoder/vp9_speed_features.h b/vp9/encoder/vp9_speed_features.h
index 573a38e66..41691f654 100644
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -239,11 +239,13 @@ typedef struct SPEED_FEATURES {
   int coeff_prob_appx_step;
 
   // Enable uniform quantizer followed by trellis coefficient optimization
-  int quant_coeff_opt;
+  int allow_quant_coeff_opt;
+  double quant_opt_thresh;
 
   // Use transform domain distortion. Use pixel domain distortion in speed 0
   // and certain situations in higher speed to improve the RD model precision.
-  int txfm_domain_distortion;
+  int allow_txfm_domain_distortion;
+  double tx_domain_thresh;
 
   // The threshold is to determine how slow the motino is, it is used when
   // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION
-- 
2.40.0