Make tokenization process support recursive transform block coding

author Jingning Han <jingning@google.com>

Wed, 7 Oct 2015 00:02:34 +0000 (17:02 -0700)

committer Jingning Han <jingning@google.com>

Thu, 8 Oct 2015 15:46:02 +0000 (08:46 -0700)
author Jingning Han <jingning@google.com>
Wed, 7 Oct 2015 00:02:34 +0000 (17:02 -0700)
committer Jingning Han <jingning@google.com>
Thu, 8 Oct 2015 15:46:02 +0000 (08:46 -0700)
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c

index fa431e0490cf511dafc302eb61df7c5e0cf4f679..0e15ad624b4aa4ae58fc629a21b6279bd1528f53 100644 (file)
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -383,6 +383,61 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd,
    }
  }
  
+#if CONFIG_VAR_TX
+static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r,
+                                  MB_MODE_INFO *const mbmi,
+                                  int plane, BLOCK_SIZE plane_bsize,
+                                  int block, int blk_row, int blk_col,
+                                  TX_SIZE tx_size, int *eob_total) {
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type,
+                          pd->subsampling_x, pd->subsampling_y) : mbmi->tx_size;
+  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+  if (xd->mb_to_bottom_edge < 0)
+    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+  if (xd->mb_to_right_edge < 0)
+    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+    return;
+
+  if (tx_size == plane_tx_size) {
+    PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV;
+    TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size);
+    const scan_order *sc = get_scan(tx_size, tx_type, 1);
+    const int eob = vp10_decode_block_tokens(xd, plane, sc,
+                                             blk_col, blk_row, tx_size,
+                                             r, mbmi->segment_id);
+    inverse_transform_block_inter(xd, plane, tx_size,
+        &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col],
+        pd->dst.stride, eob, block);
+    *eob_total += eob;
+  } else {
+    const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+    int bsl = b_width_log2_lookup[bsize];
+    int i;
+
+    assert(bsl > 0);
+    --bsl;
+
+    for (i = 0; i < 4; ++i) {
+      const int offsetr = blk_row + ((i >> 1) << bsl);
+      const int offsetc = blk_col + ((i & 0x01) << bsl);
+      int step = 1 << (2 * (tx_size - 1));
+
+      if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+        continue;
+
+      decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block + i * step,
+                            offsetr, offsetc, tx_size - 1, eob_total);
+    }
+  }
+}
+#endif
+
  static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r,
                                     MB_MODE_INFO *const mbmi, int plane,
                                     int row, int col, TX_SIZE tx_size) {
@@ -872,13 +927,31 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
  
        for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
          const struct macroblockd_plane *const pd = &xd->plane[plane];
+        const int num_4x4_w = pd->n4_w;
+        const int num_4x4_h = pd->n4_h;
+        int row, col;
+#if CONFIG_VAR_TX
+        // TODO(jingning): This can be simplified for decoder performance.
+        const BLOCK_SIZE plane_bsize =
+            get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd);
+        const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+        const int txb_size = txsize_to_bsize[max_tx_size];
+        int bw = num_4x4_blocks_wide_lookup[txb_size];
+        int block = 0;
+        const int step = 1 << (max_tx_size << 1);
+
+        for (row = 0; row < num_4x4_h; row += bw) {
+          for (col = 0; col < num_4x4_w; col += bw) {
+            decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize,
+                                  block, row, col, max_tx_size, &eobtotal);
+            block += step;
+          }
+        }
+#else
          const TX_SIZE tx_size =
              plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl)
                      : mbmi->tx_size;
-        const int num_4x4_w = pd->n4_w;
-        const int num_4x4_h = pd->n4_h;
          const int step = (1 << tx_size);
-        int row, col;
          const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ?
              0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
          const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ?
@@ -888,6 +961,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd,
            for (col = 0; col < max_blocks_wide; col += step)
              eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col,
                                                  tx_size);
+#endif
        }
  
        if (!less8x8 && eobtotal == 0)
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c

index 0f823e9e15441078d2e0b37cd8b9cac5e30221fb..dbe1ec68222f5b85c540371a24357c01232b36b9 100644 (file)
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -2962,7 +2962,12 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
                                       VPXMAX(bsize, BLOCK_8X8));
  
      vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
+#if CONFIG_VAR_TX
+    vp10_tokenize_sb_inter(cpi, td, t, !output_enabled,
+                           mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8));
+#else
      vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
+#endif
    }
  
    if (output_enabled) {
diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c

index 724e13c253d0f42f52f6b78762f225400711eab0..437870ea5f1dd596221d2e85c581777404be6ab6 100644 (file)
--- a/vp10/encoder/encodemb.c
+++ b/vp10/encoder/encodemb.c
@@ -1376,11 +1376,13 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
  
    // TODO(jingning): per transformed block zero forcing only enabled for
    // luma component. will integrate chroma components as well.
-  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
-    p->eobs[block] = 0;
-    *a = *l = 0;
-    return;
-  }
+  // Turn this back on when the rate-distortion loop is synchronized with
+  // the recursive transform block coding.
+//  if (x->zcoeff_blk[tx_size][block] && plane == 0) {
+//    p->eobs[block] = 0;
+//    *a = *l = 0;
+//    return;
+//  }
  
    if (!x->skip_recode) {
      if (x->quant_fp) {
@@ -1488,6 +1490,57 @@ static void encode_block(int plane, int block, int blk_row, int blk_col,
    }
  }
  
+#if CONFIG_VAR_TX
+static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
+                               BLOCK_SIZE plane_bsize, TX_SIZE tx_size,
+                               void *arg) {
+  struct encode_b_args *const args = arg;
+  MACROBLOCK *const x = args->x;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type,
+                          pd->subsampling_x, pd->subsampling_y) : mbmi->tx_size;
+
+  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+
+  if (xd->mb_to_bottom_edge < 0)
+    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+  if (xd->mb_to_right_edge < 0)
+    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+    return;
+
+  if (tx_size == plane_tx_size) {
+    encode_block(plane, block, blk_row, blk_col, plane_bsize,
+                 tx_size, arg);
+  } else {
+    const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+    int bsl = b_width_log2_lookup[bsize];
+    int i;
+
+    assert(bsl > 0);
+    --bsl;
+
+    for (i = 0; i < 4; ++i) {
+      const int offsetr = blk_row + ((i >> 1) << bsl);
+      const int offsetc = blk_col + ((i & 0x01) << bsl);
+      int step = 1 << (2 * (tx_size - 1));
+
+      if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+        continue;
+
+      encode_block_inter(plane, block + i * step, offsetr, offsetc,
+                         plane_bsize, tx_size - 1, arg);
+    }
+  }
+}
+#endif
+
  static void encode_block_pass1(int plane, int block, int blk_row, int blk_col,
                                 BLOCK_SIZE plane_bsize,
                                 TX_SIZE tx_size, void *arg) {
@@ -1541,6 +1594,19 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
      return;
  
    for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+#if CONFIG_VAR_TX
+    // TODO(jingning): Clean this up.
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+    const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+    const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+    int txb_size = txsize_to_bsize[max_tx_size];
+    int bh = num_4x4_blocks_wide_lookup[txb_size];
+    int idx, idy;
+    int block = 0;
+    int step = 1 << (max_tx_size * 2);
+#endif
      if (!x->skip_recode)
        vp10_subtract_plane(x, bsize, plane);
  
@@ -1548,11 +1614,21 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) {
        const struct macroblockd_plane* const pd = &xd->plane[plane];
        const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size;
        vp10_get_entropy_contexts(bsize, tx_size, pd,
-                               ctx.ta[plane], ctx.tl[plane]);
+                                ctx.ta[plane], ctx.tl[plane]);
      }
  
+#if CONFIG_VAR_TX
+    for (idy = 0; idy < mi_height; idy += bh) {
+      for (idx = 0; idx < mi_width; idx += bh) {
+        encode_block_inter(plane, block, idy, idx, plane_bsize,
+                           max_tx_size, &arg);
+        block += step;
+      }
+    }
+#else
      vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block,
-                                           &arg);
+                                            &arg);
+#endif
    }
  }
  
diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c

index c6a16a03e25a7534343301f2db6358aa3d913407..f65e17fc25438e918403204bb5383744e0690d4a 100644 (file)
--- a/vp10/encoder/tokenize.c
+++ b/vp10/encoder/tokenize.c
@@ -613,6 +613,112 @@ int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
    return result;
  }
  
+#if CONFIG_VAR_TX
+void tokenize_tx(ThreadData *td, TOKENEXTRA **t,
+                 int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize,
+                 int blk_row, int blk_col, int block, int plane,
+                 void *arg) {
+  MACROBLOCK *const x = &td->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  const struct macroblockd_plane *const pd = &xd->plane[plane];
+  TX_SIZE plane_tx_size = plane ?
+      get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type,
+                          pd->subsampling_x, pd->subsampling_y) : mbmi->tx_size;
+
+  int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize];
+  int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize];
+  if (xd->mb_to_bottom_edge < 0)
+    max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y);
+  if (xd->mb_to_right_edge < 0)
+    max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x);
+
+  if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide)
+    return;
+
+  if (tx_size == plane_tx_size) {
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
+    BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd);
+    if (!dry_run)
+      tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg);
+    else
+      set_entropy_context_b(plane, block, blk_row, blk_col,
+                            plane_bsize, tx_size, arg);
+  } else {
+    const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
+    int bsl = b_width_log2_lookup[bsize];
+    int i;
+
+    assert(bsl > 0);
+    --bsl;
+
+    for (i = 0; i < 4; ++i) {
+      const int offsetr = blk_row + ((i >> 1) << bsl);
+      const int offsetc = blk_col + ((i & 0x01) << bsl);
+      int step = 1 << (2 * (tx_size - 1));
+
+      if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
+        continue;
+
+      tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize,
+                  offsetr, offsetc, block + i * step, plane, arg);
+    }
+  }
+}
+
+void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
+                            int dry_run, int mi_row, int mi_col,
+                            BLOCK_SIZE bsize) {
+  VP10_COMMON *const cm = &cpi->common;
+  MACROBLOCK *const x = &td->mb;
+  MACROBLOCKD *const xd = &x->e_mbd;
+  MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
+  TOKENEXTRA *t_backup = *t;
+  const int ctx = vp10_get_skip_context(xd);
+  const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id,
+                                          SEG_LVL_SKIP);
+  struct tokenize_b_args arg = {cpi, td, t};
+  int plane;
+  if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
+    return;
+
+  if (mbmi->skip) {
+    if (!dry_run)
+      td->counts->skip[ctx][1] += skip_inc;
+    reset_skip_context(xd, bsize);
+    if (dry_run)
+      *t = t_backup;
+    return;
+  }
+
+  if (!dry_run)
+    td->counts->skip[ctx][0] += skip_inc;
+  else
+    *t = t_backup;
+
+  for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
+    const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
+    const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
+    const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
+    const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
+    int txb_size = txsize_to_bsize[max_tx_size];
+    int bh = num_4x4_blocks_wide_lookup[txb_size];
+    int idx, idy;
+    int block = 0;
+    int step = 1 << (max_tx_size * 2);
+
+    for (idy = 0; idy < mi_height; idy += bh) {
+      for (idx = 0; idx < mi_width; idx += bh) {
+        tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx,
+                    block, plane, &arg);
+        block += step;
+      }
+    }
+  }
+}
+#endif
+
  void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
                       int dry_run, BLOCK_SIZE bsize) {
    VP10_COMMON *const cm = &cpi->common;
diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h

index 5bad415a9a6dcd544e6ee4c64848c3d35b247c36..00d399c440350f39ca60563f426ede6a8dcfd621 100644 (file)
--- a/vp10/encoder/tokenize.h
+++ b/vp10/encoder/tokenize.h
@@ -51,6 +51,12 @@ int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane);
  struct VP10_COMP;
  struct ThreadData;
  
+#if CONFIG_VAR_TX
+void vp10_tokenize_sb_inter(struct VP10_COMP *cpi, struct ThreadData *td,
+                            TOKENEXTRA **t, int dry_run, int mi_row, int mi_col,
+                            BLOCK_SIZE bsize);
+#endif
+
  void vp10_tokenize_sb(struct VP10_COMP *cpi, struct ThreadData *td,
                       TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize);
author	Jingning Han <jingning@google.com>
	Wed, 7 Oct 2015 00:02:34 +0000 (17:02 -0700)
committer	Jingning Han <jingning@google.com>
	Thu, 8 Oct 2015 15:46:02 +0000 (08:46 -0700)
vp10/decoder/decodeframe.c		patch \| blob \| history
vp10/encoder/encodeframe.c		patch \| blob \| history
vp10/encoder/encodemb.c		patch \| blob \| history
vp10/encoder/tokenize.c		patch \| blob \| history
vp10/encoder/tokenize.h		patch \| blob \| history