From 81374532905b376cead0958c543c431bac530620 Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet@google.com>
Date: Fri, 20 Mar 2020 09:22:55 +0100
Subject: [PATCH] Optimize vp9_get_sub_block_energy.

Because energy scaling is non-decreasing, we can work on the variance
and scale after the loop. This avoids costly computations (in
particular, log()) within the loop.
We've measured that we spend 0.8% of our total time computing the log.

Change-Id: I302fc0ecd9fd8cf96ee9f31b8673e82de1b2b3e2
---
 vp9/encoder/vp9_aq_variance.c | 41 ++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/vp9/encoder/vp9_aq_variance.c b/vp9/encoder/vp9_aq_variance.c
index 1f9ce2354..9e5f3bfb2 100644
--- a/vp9/encoder/vp9_aq_variance.c
+++ b/vp9/encoder/vp9_aq_variance.c
@@ -186,6 +186,17 @@ double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
   return log(var + 1.0);
 }
 
+#define DEFAULT_E_MIDPOINT 10.0
+static int scale_block_energy(VP9_COMP *cpi, unsigned int block_var) {
+  double energy;
+  double energy_midpoint;
+  energy_midpoint =
+      (cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT;
+  energy = log(block_var + 1.0) - energy_midpoint;
+  return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
+}
+#undef DEFAULT_E_MIDPOINT
+
 // Get the range of sub block energy values;
 void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
                               int mi_col, BLOCK_SIZE bsize, int *min_e,
@@ -202,31 +213,35 @@ void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
     *min_e = vp9_block_energy(cpi, mb, bsize);
     *max_e = *min_e;
   } else {
-    int energy;
-    *min_e = ENERGY_MAX;
-    *max_e = ENERGY_MIN;
+    unsigned int var;
+    // Because scale_block_energy is non-decreasing, we can find the min/max
+    // block variance and scale afterwards. This avoids a costly scaling at
+    // every iteration.
+    unsigned int min_var = UINT_MAX;
+    unsigned int max_var = 0;
 
     for (y = 0; y < ymis; ++y) {
       for (x = 0; x < xmis; ++x) {
         vp9_setup_src_planes(mb, cpi->Source, mi_row + y, mi_col + x);
-        energy = vp9_block_energy(cpi, mb, BLOCK_8X8);
-        *min_e = VPXMIN(*min_e, energy);
-        *max_e = VPXMAX(*max_e, energy);
+        vpx_clear_system_state();
+        var = block_variance(cpi, mb, BLOCK_8X8);
+        vpx_clear_system_state();
+        min_var = VPXMIN(min_var, var);
+        max_var = VPXMAX(max_var, var);
       }
     }
+    *min_e = scale_block_energy(cpi, min_var);
+    *max_e = scale_block_energy(cpi, max_var);
   }
 
   // Re-instate source pointers back to what they should have been on entry.
   vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
 }
 
-#define DEFAULT_E_MIDPOINT 10.0
 int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
-  double energy;
-  double energy_midpoint;
+  unsigned int var;
   vpx_clear_system_state();
-  energy_midpoint =
-      (cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT;
-  energy = vp9_log_block_var(cpi, x, bs) - energy_midpoint;
-  return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
+  var = block_variance(cpi, x, bs);
+  vpx_clear_system_state();
+  return scale_block_energy(cpi, var);
 }
-- 
2.40.0