From: Jingning Han <jingning@google.com>
Date: Fri, 16 Oct 2015 06:11:30 +0000 (-0700)
Subject: Support per transform block skip coding
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bfeac5e19cfa6a083343545e20e99f72d31b1a53;p=libvpx

Support per transform block skip coding

Allow the encoder to drop individual transform block coding.

Change-Id: I2c2b2985254cb92baf891f03daa33f067279373b
---

diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h
index df3830cb5..55ec46eec 100644
--- a/vp10/encoder/block.h
+++ b/vp10/encoder/block.h
@@ -111,6 +111,9 @@ struct macroblock {
   // Notes transform blocks where no coefficents are coded.
   // Set during mode selection. Read during block encoding.
   uint8_t zcoeff_blk[TX_SIZES][256];
+#if CONFIG_VAR_TX
+  uint8_t blk_skip[MAX_MB_PLANE][256];
+#endif
 
   int skip;
 
diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c
index 6c056d28e..1ac802f81 100644
--- a/vp10/encoder/context_tree.c
+++ b/vp10/encoder/context_tree.c
@@ -28,6 +28,10 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk,
   CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
                   vpx_calloc(num_blk, sizeof(uint8_t)));
   for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+    CHECK_MEM_ERROR(cm, ctx->blk_skip[i],
+                    vpx_calloc(num_blk, sizeof(uint8_t)));
+#endif
     for (k = 0; k < 3; ++k) {
       CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
                       vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k])));
@@ -50,6 +54,10 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) {
   vpx_free(ctx->zcoeff_blk);
   ctx->zcoeff_blk = 0;
   for (i = 0; i < MAX_MB_PLANE; ++i) {
+#if CONFIG_VAR_TX
+    vpx_free(ctx->blk_skip[i]);
+    ctx->blk_skip[i] = 0;
+#endif
     for (k = 0; k < 3; ++k) {
       vpx_free(ctx->coeff[i][k]);
       ctx->coeff[i][k] = 0;
diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h
index 2a0fffbfb..55ae471c5 100644
--- a/vp10/encoder/context_tree.h
+++ b/vp10/encoder/context_tree.h
@@ -28,6 +28,9 @@ typedef struct {
   MB_MODE_INFO_EXT mbmi_ext;
   uint8_t *zcoeff_blk;
   uint8_t *color_index_map[2];
+#if CONFIG_VAR_TX
+  uint8_t *blk_skip[MAX_MB_PLANE];
+#endif
   tran_low_t *coeff[MAX_MB_PLANE][3];
   tran_low_t *qcoeff[MAX_MB_PLANE][3];
   tran_low_t *dqcoeff[MAX_MB_PLANE][3];
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 784b2580e..44a57e346 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -1031,8 +1031,15 @@ static void update_state(VP10_COMP *cpi, ThreadData *td,
   }
 
   x->skip = ctx->skip;
+
+#if CONFIG_VAR_TX
+  for (i = 0; i < MAX_MB_PLANE; ++i)
+    memcpy(x->blk_skip[i], ctx->blk_skip[i],
+           sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
   memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
          sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
 
   if (!output_enabled)
     return;
@@ -2841,6 +2848,9 @@ static void encode_frame_internal(VP10_COMP *cpi) {
 
   x->quant_fp = cpi->sf.use_quant_fp;
   vp10_zero(x->skip_txfm);
+#if CONFIG_VAR_TX
+  vp10_zero(x->blk_skip);
+#endif
 
   {
     struct vpx_usec_timer emr_timer;
@@ -3316,7 +3326,6 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
                        max_txsize_lookup[bsize]);
     else
       tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
-
     mbmi->tx_size = tx_size;
     set_txfm_ctx(xd->left_txfm_context, tx_size, xd->n8_h);
     set_txfm_ctx(xd->above_txfm_context, tx_size, xd->n8_w);
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c
index 966d05c4d..5f6e3b2af 100644
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -1393,6 +1393,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
   TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size);
 #if CONFIG_VAR_TX
   int i;
+  const int bwl = b_width_log2_lookup[plane_bsize];
 #endif
   dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col];
   a = &ctx->ta[plane][blk_col];
@@ -1408,7 +1409,12 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
 //    return;
 //  }
 
+#if CONFIG_VAR_TX
+  if (!x->skip_recode &&
+      x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) {
+#else
   if (!x->skip_recode) {
+#endif
     if (x->quant_fp) {
       // Encoding process for rtc mode
       if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) {
@@ -1435,7 +1441,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
           // skip forward transform
           p->eobs[block] = 0;
           *a = *l = 0;
+#if !CONFIG_VAR_TX
           return;
+#endif
         }
       } else {
         vp10_xform_quant(x, plane, block, blk_row, blk_col,
@@ -1443,6 +1451,12 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
       }
     }
   }
+#if CONFIG_VAR_TX
+  else {
+    if (!x->skip_recode)
+      p->eobs[block] = 0;
+  }
+#endif
 
   if (x->optimize && (!x->skip_recode || !x->skip_optimize)) {
     int ctx;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 4a4362e00..0dbbd81ab 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1733,6 +1733,7 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
   int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1);
   int all_skip = 1;
   int tmp_eob = 0;
+  int zero_blk_rate;
 
   if (ref_best_rd < 0) {
     *is_cost_valid = 0;
@@ -1775,10 +1776,27 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
     return;
 
+  zero_blk_rate =
+      x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN];
+
   if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) {
     mbmi->inter_tx_size[tx_idx] = tx_size;
     tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block,
                   plane_bsize, coeff_ctx, rate, dist, bsse, skip);
+
+    if (RDCOST(x->rdmult, x->rddiv, *rate, *dist) >=
+        RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) && (*skip == 0) &&
+        !xd->lossless[mbmi->segment_id]) {
+      *rate = zero_blk_rate;
+      *dist = *bsse;
+      *skip = 1;
+      x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1;
+      p->eobs[block] = 0;
+    } else {
+      x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0;
+      *skip = 0;
+    }
+
     if (tx_size > TX_4X4)
       *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0);
     this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist);
@@ -1833,6 +1851,7 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x,
     mbmi->tx_size = tx_size;
     if (this_rd == INT64_MAX)
       *is_cost_valid = 0;
+    x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip;
   } else {
     *rate = sum_rate;
     *dist = sum_dist;
@@ -4368,8 +4387,15 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
 
         if (!x->select_tx_size)
           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
+
+#if CONFIG_VAR_TX
+        for (i = 0; i < MAX_MB_PLANE; ++i)
+          memcpy(ctx->blk_skip[i], x->blk_skip[i],
+                 sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
         memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size],
                sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
 
         // TODO(debargha): enhance this test with a better distortion prediction
         // based on qp, activity mask and history
@@ -5120,8 +5146,14 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi,
         best_skip2 = this_skip2;
         if (!x->select_tx_size)
           swap_block_ptr(x, ctx, 1, 0, 0, max_plane);
+
+#if CONFIG_VAR_TX
+        for (i = 0; i < MAX_MB_PLANE; ++i)
+          memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk);
+#else
         memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4],
                sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
+#endif
 
         for (i = 0; i < 4; i++)
           best_bmodes[i] = xd->mi[0]->bmi[i];