Merge "Enable fast forward txfm and quant for rate-distortion search"

author Jingning Han <jingning@google.com>

Tue, 12 Aug 2014 00:56:48 +0000 (17:56 -0700)

committer Gerrit Code Review <gerrit@gerrit.golo.chromium.org>

Tue, 12 Aug 2014 00:56:48 +0000 (17:56 -0700)
author Jingning Han <jingning@google.com>
Tue, 12 Aug 2014 00:56:48 +0000 (17:56 -0700)
committer Gerrit Code Review <gerrit@gerrit.golo.chromium.org>
Tue, 12 Aug 2014 00:56:48 +0000 (17:56 -0700)
diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h

index 2c77391159746d9fe45359c7f671fc0e81e433c9..bd3b0fdc85d8c08e1f6099573b5ac50722a6291a 100644 (file)
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -41,6 +41,7 @@ struct macroblock_plane {
    int16_t *zbin;
    int16_t *round;
  
+  int64_t quant_thred[2];
    // Zbin Over Quant value
    int16_t zbin_extra;
  };
@@ -117,6 +118,8 @@ struct macroblock {
    // skip forward transform and quantization
    int skip_txfm[MAX_MB_PLANE];
  
+  int64_t bsse[MAX_MB_PLANE];
+
    // Used to store sub partition's choices.
    MV pred_mv[MAX_REF_FRAMES];
  
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c

index e153b207765335a9c1de1b05ba3df13c96445cc0..eababdbca9746370b0f34e91f5a3442edb6387b8 100644 (file)
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -23,15 +23,14 @@ void vp9_quantize_dc(const int16_t *coeff_ptr, int skip_block,
                       const int16_t *round_ptr, const int16_t quant,
                       int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                       const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  int eob = -1;
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int tmp, eob = -1;
  
    if (!skip_block) {
-    const int rc = 0;
-    const int coeff = coeff_ptr[rc];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
-
-    int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
      tmp = (tmp * quant) >> 16;
      qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr;
@@ -45,15 +44,15 @@ void vp9_quantize_dc_32x32(const int16_t *coeff_ptr, int skip_block,
                             const int16_t *round_ptr, const int16_t quant,
                             int16_t *qcoeff_ptr, int16_t *dqcoeff_ptr,
                             const int16_t dequant_ptr, uint16_t *eob_ptr) {
-  int eob = -1;
+  const int rc = 0;
+  const int coeff = coeff_ptr[rc];
+  const int coeff_sign = (coeff >> 31);
+  const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+  int tmp, eob = -1;
  
    if (!skip_block) {
-    const int rc = 0;
-    const int coeff = coeff_ptr[rc];
-    const int coeff_sign = (coeff >> 31);
-    const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
  
-    int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
+    tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
      tmp = (tmp * quant) >> 15;
      qcoeff_ptr[rc]  = (tmp ^ coeff_sign) - coeff_sign;
      dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr / 2;
@@ -354,6 +353,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
    x->plane[0].quant_shift = quants->y_quant_shift[qindex];
    x->plane[0].zbin = quants->y_zbin[qindex];
    x->plane[0].round = quants->y_round[qindex];
+  x->plane[0].quant_thred[0] = cm->y_dequant[qindex][0] *
+                                  cm->y_dequant[qindex][0];
+  x->plane[0].quant_thred[1] = cm->y_dequant[qindex][1] *
+                                  cm->y_dequant[qindex][1];
    x->plane[0].zbin_extra = (int16_t)((cm->y_dequant[qindex][1] * zbin) >> 7);
    xd->plane[0].dequant = cm->y_dequant[qindex];
  
@@ -365,6 +368,10 @@ void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) {
      x->plane[i].quant_shift = quants->uv_quant_shift[qindex];
      x->plane[i].zbin = quants->uv_zbin[qindex];
      x->plane[i].round = quants->uv_round[qindex];
+    x->plane[i].quant_thred[0] = cm->y_dequant[qindex][0] *
+                                    cm->y_dequant[qindex][0];
+    x->plane[i].quant_thred[1] = cm->y_dequant[qindex][1] *
+                                    cm->y_dequant[qindex][1];
      x->plane[i].zbin_extra = (int16_t)((cm->uv_dequant[qindex][1] * zbin) >> 7);
      xd->plane[i].dequant = cm->uv_dequant[qindex];
    }
diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c

index 5caafd37022858068364333c5bb0d8b3ab5b9bf0..cc55dd78f122b10f8ce179db74ac4ecdf1cc98ec 100644 (file)
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -171,15 +171,27 @@ static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize,
    int64_t dist_sum = 0;
    const int ref = xd->mi[0]->mbmi.ref_frame[0];
    unsigned int sse;
+  const int shift = 8;
  
    for (i = 0; i < MAX_MB_PLANE; ++i) {
      struct macroblock_plane *const p = &x->plane[i];
      struct macroblockd_plane *const pd = &xd->plane[i];
      const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
  
-    (void) cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
-                              pd->dst.buf, pd->dst.stride, &sse);
+    const unsigned int var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride,
+                                                pd->dst.buf, pd->dst.stride,
+                                                &sse);
  
+    if (!x->select_tx_size) {
+      if (sse < p->quant_thred[0] >> shift)
+        x->skip_txfm[i] = 1;
+      else if (var < p->quant_thred[1] >> shift)
+        x->skip_txfm[i] = 2;
+      else
+        x->skip_txfm[i] = 0;
+    }
+
+    x->bsse[i] = sse;
      if (i == 0)
        x->pred_sse[ref] = sse;
  
@@ -357,12 +369,32 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize,
    if (args->skip)
      return;
  
-  if (!is_inter_block(mbmi))
+  if (!is_inter_block(mbmi)) {
      vp9_encode_block_intra(x, plane, block, plane_bsize, tx_size, &mbmi->skip);
-  else
-    vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+    dist_block(plane, block, tx_size, args);
+  } else {
+    if (x->skip_txfm[plane] == 0) {
+      // full forward transform and quantization
+      vp9_xform_quant(x, plane, block, plane_bsize, tx_size);
+      dist_block(plane, block, tx_size, args);
+    } else if (x->skip_txfm[plane] == 2) {
+      // compute DC coefficient
+      int16_t *const coeff   = BLOCK_OFFSET(x->plane[plane].coeff, block);
+      int16_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
+      vp9_xform_quant_dc(x, plane, block, plane_bsize, tx_size);
+      args->sse  = x->bsse[plane] << 4;
+      args->dist = args->sse;
+      if (!x->plane[plane].eobs[block])
+        args->dist = args->sse - ((coeff[0] * coeff[0] -
+            (coeff[0] - dqcoeff[0]) * (coeff[0] - dqcoeff[0])) >> 2);
+    } else {
+      // skip forward transform
+      x->plane[plane].eobs[block] = 0;
+      args->sse  = x->bsse[plane] << 4;
+      args->dist = args->sse;
+    }
+  }
  
-  dist_block(plane, block, tx_size, args);
    rate_block(plane, block, plane_bsize, tx_size, args);
    rd1 = RDCOST(x->rdmult, x->rddiv, args->rate, args->dist);
    rd2 = RDCOST(x->rdmult, x->rddiv, 0, args->sse);
@@ -2102,6 +2134,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
    int orig_dst_stride[MAX_MB_PLANE];
    int rs = 0;
    INTERP_FILTER best_filter = SWITCHABLE;
+  int skip_txfm[MAX_MB_PLANE] = {0};
+  int64_t bsse[MAX_MB_PLANE] = {0};
  
    int bsl = mi_width_log2_lookup[bsize];
    int pred_filter_search = cpi->sf.cb_pred_filter_search ?
@@ -2264,6 +2298,8 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
            best_filter = mbmi->interp_filter;
            if (cm->interp_filter == SWITCHABLE && i && !intpel_mv)
              best_needs_copy = !best_needs_copy;
+          vpx_memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm));
+          vpx_memcpy(bsse, x->bsse, sizeof(bsse));
          }
  
          if ((cm->interp_filter == SWITCHABLE && newbest) ||
@@ -2316,6 +2352,9 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
                                disable_skip);
    }
  
+  vpx_memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
+  vpx_memcpy(x->bsse, bsse, sizeof(bsse));
+
    if (!x->skip) {
      int skippable_y, skippable_uv;
      int64_t sseuv = INT64_MAX;
author	Jingning Han <jingning@google.com>
	Tue, 12 Aug 2014 00:56:48 +0000 (17:56 -0700)
committer	Gerrit Code Review <gerrit@gerrit.golo.chromium.org>
	Tue, 12 Aug 2014 00:56:48 +0000 (17:56 -0700)
vp9/encoder/vp9_block.h		patch \| blob \| history
vp9/encoder/vp9_quantize.c		patch \| blob \| history
vp9/encoder/vp9_rdopt.c		patch \| blob \| history