Add simple version of activity masking.

author Timothy B. Terriberry <tterribe@xiph.org>

Mon, 11 Oct 2010 21:37:27 +0000 (14:37 -0700)

committer John Koleszar <jkoleszar@google.com>

Tue, 12 Oct 2010 12:41:03 +0000 (08:41 -0400)
author Timothy B. Terriberry <tterribe@xiph.org>
Mon, 11 Oct 2010 21:37:27 +0000 (14:37 -0700)
committer John Koleszar <jkoleszar@google.com>
Tue, 12 Oct 2010 12:41:03 +0000 (08:41 -0400)
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h

index ffb88904e7e8d2e8b7d5572a5ac2b4248460c8f9..fc6561829f89260b884d7cec65e6c3d4aa1ca858 100644 (file)
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -81,6 +81,7 @@ typedef struct
      int errthresh;
      int rddiv;
      int rdmult;
+    INT64 activity_sum;
  
      int mvcosts[2][MVvals+1];
      int *mvcost[2];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c

index efbe2365b633d5cc883ce6f33cca44674c31be79..b0a3c4713f4c306e3afa87ffba53322bf6b610b1 100644 (file)
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -369,6 +369,62 @@ void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
  }
  
  
+/* activity_avg must be positive, or flat regions could get a zero weight
+ *  (infinite lambda), which confounds analysis.
+ * This also avoids the need for divide by zero checks in
+ *  vp8_activity_masking().
+ */
+#define VP8_ACTIVITY_AVG_MIN (64)
+
+/* This is used as a reference when computing the source variance for the
+ *  purposes of activity masking.
+ * Eventually this should be replaced by custom no-reference routines,
+ *  which will be faster.
+ */
+static const unsigned char VP8_VAR_OFFS[16]=
+{
+    128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
+};
+
+unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
+{
+    unsigned int act;
+    unsigned int sse;
+    int sum;
+    unsigned int a;
+    unsigned int b;
+    unsigned int d;
+    /* TODO: This could also be done over smaller areas (8x8), but that would
+     *  require extensive changes elsewhere, as lambda is assumed to be fixed
+     *  over an entire MB in most of the code.
+     * Another option is to compute four 8x8 variances, and pick a single
+     *  lambda using a non-linear combination (e.g., the smallest, or second
+     *  smallest, etc.).
+     */
+    VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
+     x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
+    /* This requires a full 32 bits of precision. */
+    act = (sse<<8) - sum*sum;
+    /* Drop 4 to give us some headroom to work with. */
+    act = (act + 8) >> 4;
+    /* If the region is flat, lower the activity some more. */
+    if (act < 8<<12)
+        act = act < 5<<12 ? act : 5<<12;
+    /* TODO: For non-flat regions, edge regions should receive less masking
+     *  than textured regions, but identifying edge regions quickly and
+     *  reliably enough is still a subject of experimentation.
+     * This will be most noticable near edges with a complex shape (e.g.,
+     *  text), but the 4x4 transform size should make this less of a problem
+     *  than it would be for an 8x8 transform.
+     */
+    /* Apply the masking to the RD multiplier. */
+    a = act + 4*cpi->activity_avg;
+    b = 4*act + cpi->activity_avg;
+    x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
+    return act;
+}
+
+
  
  static
  void encode_mb_row(VP8_COMP *cpi,
@@ -380,6 +436,7 @@ void encode_mb_row(VP8_COMP *cpi,
                     int *segment_counts,
                     int *totalrate)
  {
+    INT64 activity_sum = 0;
      int i;
      int recon_yoffset, recon_uvoffset;
      int mb_col;
@@ -431,6 +488,11 @@ void encode_mb_row(VP8_COMP *cpi,
          xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
          xd->left_available = (mb_col != 0);
  
+        x->rddiv = cpi->RDDIV;
+        x->rdmult = cpi->RDMULT;
+
+        activity_sum += vp8_activity_masking(cpi, x);
+
          // Is segmentation enabled
          // MB level adjutment to quantizer
          if (xd->segmentation_enabled)
@@ -537,6 +599,7 @@ void encode_mb_row(VP8_COMP *cpi,
      // this is to account for the border
      xd->mode_info_context++;
      x->partition_info++;
+    x->activity_sum += activity_sum;
  }
  
  
@@ -653,8 +716,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
  
      vp8_setup_block_ptrs(x);
  
-    x->rddiv = cpi->RDDIV;
-    x->rdmult = cpi->RDMULT;
+    x->activity_sum = 0;
  
  #if 0
      // Experimental rd code
@@ -709,11 +771,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
          else
          {
  #if CONFIG_MULTITHREAD
+            int i;
+
              vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
  
              for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
              {
-                int i;
                  cpi->current_mb_col_main = -1;
  
                  for (i = 0; i < cpi->encoding_thread_count; i++)
@@ -791,6 +854,11 @@ void vp8_encode_frame(VP8_COMP *cpi)
                  totalrate += cpi->mb_row_ei[i].totalrate;
              }
  
+            for (i = 0; i < cpi->encoding_thread_count; i++)
+            {
+                x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
+            }
+
  #endif
  
          }
@@ -926,6 +994,14 @@ void vp8_encode_frame(VP8_COMP *cpi)
      cpi->last_frame_distortion = cpi->frame_distortion;
  #endif
  
+    /* Update the average activity for the next frame.
+     * This is feed-forward for now; it could also be saved in two-pass, or
+     *  done during lookahead when that is eventually added.
+     */
+    cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
+    if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
+        cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
+
  }
  void vp8_setup_block_ptrs(MACROBLOCK *x)
  {
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c

index af80857d231063e747919e2a4d9f2da882d26c0e..820b85b1145ea04a9e7d88e02e6d8b7bd5003c4c 100644 (file)
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -109,7 +109,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
  
  #if !(CONFIG_REALTIME_ONLY)
  #if 1
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize)
          vp8_optimize_mby(x, rtcd);
  
  #endif
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c

index 63cdf3c1a35db6b41991b51f6a0e843b0ac46b8c..a37188bf2887c910a63cb7498cc2f7d2293bfbd7 100644 (file)
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -662,7 +662,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
      vp8_quantize_mb(x);
  
  #if !(CONFIG_REALTIME_ONLY)
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize)
      {
          vp8_optimize_mb(x, rtcd);
          vp8_find_mb_skip_coef(x);
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c

index 962e741741fed584143b7d3f4754d82c5f538eba..3646375eda2657d3b873f199c0dd7747e211b378 100644 (file)
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -61,6 +61,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                      int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
                      int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
                      volatile int *last_row_current_mb_col;
+                    INT64 activity_sum = 0;
  
                      if (ithread > 0)
                          last_row_current_mb_col = &cpi->mb_row_ei[ithread-1].current_mb_col;
@@ -111,6 +112,11 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                          xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
                          xd->left_available = (mb_col != 0);
  
+                        x->rddiv = cpi->RDDIV;
+                        x->rdmult = cpi->RDMULT;
+
+                        activity_sum += vp8_activity_masking(cpi, x);
+
                          // Is segmentation enabled
                          // MB level adjutment to quantizer
                          if (xd->segmentation_enabled)
@@ -197,6 +203,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                      // this is to account for the border
                      xd->mode_info_context++;
                      x->partition_info++;
+                    x->activity_sum += activity_sum;
  
                      x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
                      x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
@@ -240,8 +247,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
      z->sadperbit16      = x->sadperbit16;
      z->sadperbit4       = x->sadperbit4;
      z->errthresh        = x->errthresh;
-    z->rddiv            = x->rddiv;
-    z->rdmult           = x->rdmult;
  
      /*
      z->mv_col_min    = x->mv_col_min;
@@ -392,8 +397,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  
          vp8_setup_block_ptrs(mb);
  
-        mb->rddiv = cpi->RDDIV;
-        mb->rdmult = cpi->RDMULT;
+        mb->activity_sum = 0;
  
          mbd->left_context = &cm->left_context;
          mb->mvc = cm->fc.mvc;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c

index 34af6ea8ee21cf69de7b3eaff60ab07451f53271..08c7ad46e81b03d36936ade768360bd141197382 100644 (file)
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2187,6 +2187,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
      init_context_counters();
  #endif
  
+    /*Initialize the feed-forward activity masking.*/
+    cpi->activity_avg = 90<<12;
  
      cpi->frames_since_key = 8;        // Give a sensible default for the first frame.
      cpi->key_frame_frequency = cpi->oxcf.key_freq;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h

index cb768c01840d2e4bf18876adf20b5df75e121416..3f2d920c9eaf2135242587c338bd6f9fa47e6b7e 100644 (file)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -308,6 +308,7 @@ typedef struct
      int mvcostmultiplier;
      int subseqblockweight;
      int errthresh;
+    unsigned int activity_avg;
  
      int RDMULT;
      int RDDIV ;
@@ -659,6 +660,8 @@ void vp8_encode_frame(VP8_COMP *cpi);
  
  void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size);
  
+unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x);
+
  int rd_cost_intra_mb(MACROBLOCKD *x);
  
  void vp8_tokenize_mb(VP8_COMP *, MACROBLOCKD *, TOKENEXTRA **);
author	Timothy B. Terriberry <tterribe@xiph.org>
	Mon, 11 Oct 2010 21:37:27 +0000 (14:37 -0700)
committer	John Koleszar <jkoleszar@google.com>
	Tue, 12 Oct 2010 12:41:03 +0000 (08:41 -0400)
vp8/encoder/block.h		patch \| blob \| history
vp8/encoder/encodeframe.c		patch \| blob \| history
vp8/encoder/encodeintra.c		patch \| blob \| history
vp8/encoder/encodemb.c		patch \| blob \| history
vp8/encoder/ethreading.c		patch \| blob \| history
vp8/encoder/onyx_if.c		patch \| blob \| history
vp8/encoder/onyx_int.h		patch \| blob \| history