From: hui su <huisu@google.com>
Date: Mon, 7 Mar 2016 23:25:50 +0000 (-0800)
Subject: Refactor entropy coding of transform size
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=954e560f9e4528697c110016adf6341ccbdb0b7f;p=libvpx

Refactor entropy coding of transform size

No performance change.

Change-Id: If35125fed909d89235b303514f77a33183bb36b3
---

diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c
index e4c27a777..d799b1af5 100644
--- a/vp10/common/entropymode.c
+++ b/vp10/common/entropymode.c
@@ -320,17 +320,6 @@ static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = {
 #endif  // CONFIG_EXT_REFS
 };
 
-static const struct tx_probs default_tx_probs = {
-  { { 3, 136, 37 },
-    { 5, 52,  13 } },
-
-  { { 20, 152 },
-    { 15, 101 } },
-
-  { { 100 },
-    { 66  } }
-};
-
 const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)] = {
     -TWO_COLORS, 2,
     -THREE_COLORS, 4,
@@ -694,6 +683,34 @@ static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = {
     9680, 10648, 10890, 13310
 };
 
+const vpx_tree_index vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)] = {
+    {  // Max tx_size is 8X8
+        -TX_4X4, -TX_8X8,
+    },
+    {  // Max tx_size is 16X16
+        -TX_4X4, 2,
+        -TX_8X8, -TX_16X16,
+    },
+    {  // Max tx_size is 32X32
+        -TX_4X4, 2,
+        -TX_8X8, 4,
+        -TX_16X16, -TX_32X32,
+    },
+};
+
+static const vpx_prob
+default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1] = {
+    {  // Max tx_size is 8X8
+        { 100, }, { 66, },
+    },
+    {  // Max tx_size is 16X16
+        { 20, 152, }, { 15, 101, },
+    },
+    {  // Max tx_size is 32X32
+        { 3, 136, 37 }, { 5, 52,  13 },
+    },
+};
+
 int vp10_get_palette_color_context(const uint8_t *color_map, int cols,
                                    int r, int c, int n, int *color_order) {
   int i, j, max, max_idx, temp;
@@ -767,33 +784,6 @@ int vp10_get_palette_color_context(const uint8_t *color_map, int cols,
   return color_ctx;
 }
 
-void vp10_tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
-                                      unsigned int (*ct_32x32p)[2]) {
-  ct_32x32p[0][0] = tx_count_32x32p[TX_4X4];
-  ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] +
-                    tx_count_32x32p[TX_16X16] +
-                    tx_count_32x32p[TX_32X32];
-  ct_32x32p[1][0] = tx_count_32x32p[TX_8X8];
-  ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] +
-                    tx_count_32x32p[TX_32X32];
-  ct_32x32p[2][0] = tx_count_32x32p[TX_16X16];
-  ct_32x32p[2][1] = tx_count_32x32p[TX_32X32];
-}
-
-void vp10_tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
-                                      unsigned int (*ct_16x16p)[2]) {
-  ct_16x16p[0][0] = tx_count_16x16p[TX_4X4];
-  ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16];
-  ct_16x16p[1][0] = tx_count_16x16p[TX_8X8];
-  ct_16x16p[1][1] = tx_count_16x16p[TX_16X16];
-}
-
-void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
-                                    unsigned int (*ct_8x8p)[2]) {
-  ct_8x8p[0][0] = tx_count_8x8p[TX_4X4];
-  ct_8x8p[0][1] = tx_count_8x8p[TX_8X8];
-}
-
 #if CONFIG_VAR_TX
 static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = {
     192, 128, 64, 192, 128, 64, 192, 128, 64,
@@ -1315,7 +1305,7 @@ static void init_mode_probs(FRAME_CONTEXT *fc) {
   vp10_copy(fc->comp_inter_prob, default_comp_inter_p);
   vp10_copy(fc->comp_ref_prob, default_comp_ref_p);
   vp10_copy(fc->single_ref_prob, default_single_ref_p);
-  fc->tx_probs = default_tx_probs;
+  vp10_copy(fc->tx_size_probs, default_tx_size_prob);
 #if CONFIG_VAR_TX
   vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs);
 #endif
@@ -1467,32 +1457,18 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) {
 }
 
 void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) {
-  int i;
+  int i, j;
   FRAME_CONTEXT *fc = cm->fc;
   const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx];
   const FRAME_COUNTS *counts = &cm->counts;
 
   if (cm->tx_mode == TX_MODE_SELECT) {
-    int j;
-    unsigned int branch_ct_8x8p[TX_SIZES - 3][2];
-    unsigned int branch_ct_16x16p[TX_SIZES - 2][2];
-    unsigned int branch_ct_32x32p[TX_SIZES - 1][2];
-
-    for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
-      vp10_tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p);
-      for (j = 0; j < TX_SIZES - 3; ++j)
-        fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs(
-            pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]);
-
-      vp10_tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p);
-      for (j = 0; j < TX_SIZES - 2; ++j)
-        fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs(
-            pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]);
-
-      vp10_tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p);
-      for (j = 0; j < TX_SIZES - 1; ++j)
-        fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs(
-            pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]);
+    for (i = 0; i < TX_SIZES - 1; ++i) {
+      for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+        vpx_tree_merge_probs(vp10_tx_size_tree[i],
+                             pre_fc->tx_size_probs[i][j],
+                             counts->tx_size[i][j],
+                             fc->tx_size_probs[i][j]);
     }
   }
 
@@ -1532,7 +1508,6 @@ void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) {
   }
 #else
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
-    int j;
     for (j = 0; j < TX_TYPES; ++j)
       vpx_tree_merge_probs(vp10_ext_tx_tree,
                            pre_fc->intra_ext_tx_prob[i][j],
diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h
index d9858b3d3..0c9c33246 100644
--- a/vp10/common/entropymode.h
+++ b/vp10/common/entropymode.h
@@ -37,19 +37,6 @@ extern "C" {
 
 struct VP10Common;
 
-struct tx_probs {
-  vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1];
-  vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2];
-  vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3];
-};
-
-struct tx_counts {
-  unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES];
-  unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1];
-  unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2];
-  unsigned int tx_totals[TX_SIZES];
-};
-
 struct seg_counts {
   unsigned int tree_total[MAX_SEGMENTS];
   unsigned int tree_mispred[MAX_SEGMENTS];
@@ -89,7 +76,7 @@ typedef struct frame_contexts {
   vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS];
   vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1];
   vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS-1];
-  struct tx_probs tx_probs;
+  vpx_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1];
 #if CONFIG_VAR_TX
   vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS];
 #endif
@@ -151,7 +138,8 @@ typedef struct FRAME_COUNTS {
   unsigned int comp_inter[COMP_INTER_CONTEXTS][2];
   unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2];
   unsigned int comp_ref[REF_CONTEXTS][COMP_REFS-1][2];
-  struct tx_counts tx;
+  unsigned int tx_size_totals[TX_SIZES];
+  unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
 #if CONFIG_VAR_TX
   unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2];
 #endif
@@ -205,6 +193,8 @@ extern const vpx_tree_index vp10_switchable_interp_tree
 extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)];
 extern const vpx_tree_index
 vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)];
+extern const vpx_tree_index
+vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)];
 #if CONFIG_EXT_INTRA
 extern const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)];
 #endif  // CONFIG_EXT_INTRA
@@ -223,13 +213,6 @@ void vp10_setup_past_independence(struct VP10Common *cm);
 void vp10_adapt_intra_frame_probs(struct VP10Common *cm);
 void vp10_adapt_inter_frame_probs(struct VP10Common *cm);
 
-void vp10_tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p,
-                                      unsigned int (*ct_32x32p)[2]);
-void vp10_tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p,
-                                      unsigned int (*ct_16x16p)[2]);
-void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
-                                    unsigned int (*ct_8x8p)[2]);
-
 static INLINE int vp10_ceil_log2(int n) {
   int i = 1, p = 2;
   while (p < n) {
diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h
index 7d2f28a8e..83a3597a3 100644
--- a/vp10/common/pred_common.h
+++ b/vp10/common/pred_common.h
@@ -185,48 +185,11 @@ static INLINE int get_tx_size_context(const MACROBLOCKD *xd) {
   return (above_ctx + left_ctx) > max_tx_size;
 }
 
-static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
-                                           const struct tx_probs *tx_probs) {
-  switch (max_tx_size) {
-    case TX_8X8:
-      return tx_probs->p8x8[ctx];
-    case TX_16X16:
-      return tx_probs->p16x16[ctx];
-    case TX_32X32:
-      return tx_probs->p32x32[ctx];
-    default:
-      assert(0 && "Invalid max_tx_size.");
-      return NULL;
-  }
-}
-
-static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size,
-                                            const MACROBLOCKD *xd,
-                                            const struct tx_probs *tx_probs) {
-  return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs);
-}
-
-static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
-                                          struct tx_counts *tx_counts) {
-  switch (max_tx_size) {
-    case TX_8X8:
-      return tx_counts->p8x8[ctx];
-    case TX_16X16:
-      return tx_counts->p16x16[ctx];
-    case TX_32X32:
-      return tx_counts->p32x32[ctx];
-    default:
-      assert(0 && "Invalid max_tx_size.");
-      return NULL;
-  }
-}
-
 #if CONFIG_VAR_TX
 static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
                              MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize,
                              TX_SIZE tx_size, int blk_row, int blk_col,
-                             TX_SIZE max_tx_size, int ctx,
-                             struct tx_counts *tx_counts) {
+                             TX_SIZE max_tx_size, int ctx) {
   const struct macroblockd_plane *const pd = &xd->plane[0];
   const BLOCK_SIZE bsize = txsize_to_bsize[tx_size];
   int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 +
@@ -244,7 +207,7 @@ static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
     return;
 
   if (tx_size == plane_tx_size) {
-    ++get_tx_counts(max_tx_size, ctx, tx_counts)[tx_size];
+    ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][tx_size];
     mbmi->tx_size = tx_size;
   } else {
     int bsl = b_width_log2_lookup[bsize];
@@ -260,8 +223,7 @@ static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd,
       if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide)
         continue;
       update_tx_counts(cm, xd, mbmi, plane_bsize,
-                       tx_size - 1, offsetr, offsetc,
-                       max_tx_size, ctx, tx_counts);
+                       tx_size - 1, offsetr, offsetc, max_tx_size, ctx);
     }
   }
 }
@@ -270,8 +232,7 @@ static INLINE void inter_block_tx_count_update(VP10_COMMON *cm,
                                                MACROBLOCKD *xd,
                                                MB_MODE_INFO *mbmi,
                                                BLOCK_SIZE plane_bsize,
-                                               int ctx,
-                                               struct tx_counts *tx_counts) {
+                                               int ctx) {
   const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize];
   const int mi_height = num_4x4_blocks_high_lookup[plane_bsize];
   TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize];
@@ -282,7 +243,7 @@ static INLINE void inter_block_tx_count_update(VP10_COMMON *cm,
   for (idy = 0; idy < mi_height; idy += bh)
     for (idx = 0; idx < mi_width; idx += bh)
       update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx,
-                       max_tx_size, ctx, tx_counts);
+                       max_tx_size, ctx);
 }
 #endif
 
diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c
index ecc971a7c..c916babdc 100644
--- a/vp10/common/thread_common.c
+++ b/vp10/common/thread_common.c
@@ -432,19 +432,13 @@ void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts,
       for (k = 0; k < 2; k++)
         cm->counts.comp_ref[i][j][k] += counts->comp_ref[i][j][k];
 
-  for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-    for (j = 0; j < TX_SIZES; j++)
-      cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j];
-
-    for (j = 0; j < TX_SIZES - 1; j++)
-      cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j];
+  for (i = 0; i < TX_SIZES - 1; ++i)
+    for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+      for (k = 0; k < i + 2; ++k)
+        cm->counts.tx_size[i][j][k] += counts->tx_size[i][j][k];
 
-    for (j = 0; j < TX_SIZES - 2; j++)
-      cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j];
-  }
-
-  for (i = 0; i < TX_SIZES; i++)
-    cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i];
+  for (i = 0; i < TX_SIZES; ++i)
+    cm->counts.tx_size_totals[i] += counts->tx_size_totals[i];
 
 #if CONFIG_VAR_TX
   for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i)
diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c
index 64ac3ccf3..2886c854c 100644
--- a/vp10/decoder/decodeframe.c
+++ b/vp10/decoder/decodeframe.c
@@ -100,22 +100,6 @@ static TX_MODE read_tx_mode(struct vpx_read_bit_buffer *rb) {
   return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2);
 }
 
-static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) {
-  int i, j;
-
-  for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-    for (j = 0; j < TX_SIZES - 3; ++j)
-      vp10_diff_update_prob(r, &tx_probs->p8x8[i][j]);
-
-  for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-    for (j = 0; j < TX_SIZES - 2; ++j)
-      vp10_diff_update_prob(r, &tx_probs->p16x16[i][j]);
-
-  for (i = 0; i < TX_SIZE_CONTEXTS; ++i)
-    for (j = 0; j < TX_SIZES - 1; ++j)
-      vp10_diff_update_prob(r, &tx_probs->p32x32[i][j]);
-}
-
 static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
   int i, j;
   for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j)
@@ -3541,8 +3525,13 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data,
     vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
                        "Failed to allocate bool decoder 0");
 
-  if (cm->tx_mode == TX_MODE_SELECT)
-    read_tx_mode_probs(&fc->tx_probs, &r);
+  if (cm->tx_mode == TX_MODE_SELECT) {
+    for (i = 0; i < TX_SIZES - 1; ++i)
+      for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+        for (k = 0; k < i + 1; ++k)
+          vp10_diff_update_prob(&r, &fc->tx_size_probs[i][j][k]);
+  }
+
   read_coef_probs(fc, cm->tx_mode, &r);
 
 #if CONFIG_VAR_TX
@@ -3679,7 +3668,8 @@ static void debug_check_frame_counts(const VP10_COMMON *const cm) {
                  sizeof(cm->counts.single_ref)));
   assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref,
                  sizeof(cm->counts.comp_ref)));
-  assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx)));
+  assert(!memcmp(&cm->counts.tx_size, &zero_counts.tx_size,
+                 sizeof(cm->counts.tx_size)));
   assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip)));
 #if CONFIG_REF_MV
   assert(!memcmp(&cm->counts.mv[0], &zero_counts.mv[0],
diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c
index fccd3c880..eefef748f 100644
--- a/vp10/decoder/decodemv.c
+++ b/vp10/decoder/decodemv.c
@@ -273,16 +273,11 @@ static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd,
                                      TX_SIZE max_tx_size, vpx_reader *r) {
   FRAME_COUNTS *counts = xd->counts;
   const int ctx = get_tx_size_context(xd);
-  const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs);
-  int tx_size = vpx_read(r, tx_probs[0]);
-  if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
-    tx_size += vpx_read(r, tx_probs[1]);
-    if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
-      tx_size += vpx_read(r, tx_probs[2]);
-  }
-
+  const int tx_size_cat = max_tx_size - TX_8X8;
+  int tx_size = vpx_read_tree(r, vp10_tx_size_tree[tx_size_cat],
+                              cm->fc->tx_size_probs[tx_size_cat][ctx]);
   if (counts)
-    ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size];
+    ++counts->tx_size[tx_size_cat][ctx][tx_size];
   return (TX_SIZE)tx_size;
 }
 
@@ -1508,7 +1503,7 @@ static void read_inter_frame_mode_info(VP10Decoder *const pbi,
                              idy, idx, r);
       if (xd->counts) {
         const int ctx = get_tx_size_context(xd);
-        ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size];
+        ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][mbmi->tx_size];
       }
     } else {
       mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r);
diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c
index 2603b6b48..7d1e8820e 100644
--- a/vp10/encoder/bitstream.c
+++ b/vp10/encoder/bitstream.c
@@ -81,6 +81,13 @@ palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = {
         {30, 5}, {62, 6}, {126, 7}, {127, 7}},  // 8 colors
 };
 
+static const struct vp10_token
+tx_size_encodings[TX_SIZES - 1][TX_SIZES] = {
+    {{0, 1}, {1, 1}},  // Max tx_size is 8X8
+    {{0, 1}, {2, 2}, {3, 2}},  // Max tx_size is 16X16
+    {{0, 1}, {2, 2}, {6, 3}, {7, 3}},  // Max tx_size is 32X32
+};
+
 static INLINE void write_uniform(vpx_writer *w, int n, int v) {
   int l = get_unsigned_bits(n);
   int m = (1 << l) - n;
@@ -314,13 +321,11 @@ static void write_selected_tx_size(const VP10_COMMON *cm,
   TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size;
   BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type;
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
-  const vpx_prob *const tx_probs = get_tx_probs2(max_tx_size, xd,
-                                                 &cm->fc->tx_probs);
-  vpx_write(w, tx_size != TX_4X4, tx_probs[0]);
-  if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
-    vpx_write(w, tx_size != TX_8X8, tx_probs[1]);
-    if (tx_size != TX_8X8 && max_tx_size >= TX_32X32)
-      vpx_write(w, tx_size != TX_16X16, tx_probs[2]);
+  if (max_tx_size > TX_4X4) {
+    vp10_write_token(w, vp10_tx_size_tree[max_tx_size - TX_8X8],
+                     cm->fc->tx_size_probs[max_tx_size - TX_8X8]
+                                          [get_tx_size_context(xd)],
+                     &tx_size_encodings[max_tx_size - TX_8X8][tx_size]);
   }
 }
 
@@ -1847,7 +1852,7 @@ static void update_coef_probs(VP10_COMP *cpi, vpx_writer* w) {
   for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) {
     vp10_coeff_stats frame_branch_ct[PLANE_TYPES];
     vp10_coeff_probs_model frame_coef_probs[PLANE_TYPES];
-    if (cpi->td.counts->tx.tx_totals[tx_size] <= 20 ||
+    if (cpi->td.counts->tx_size_totals[tx_size] <= 20 ||
         (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) {
       vpx_write_bit(w, 0);
     } else {
@@ -2028,30 +2033,11 @@ static void update_txfm_probs(VP10_COMMON *cm, vpx_writer *w,
                               FRAME_COUNTS *counts) {
   if (cm->tx_mode == TX_MODE_SELECT) {
     int i, j;
-    unsigned int ct_8x8p[TX_SIZES - 3][2];
-    unsigned int ct_16x16p[TX_SIZES - 2][2];
-    unsigned int ct_32x32p[TX_SIZES - 1][2];
-
-
-    for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-      vp10_tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p);
-      for (j = 0; j < TX_SIZES - 3; j++)
-        vp10_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]);
-    }
-
-    for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-      vp10_tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], ct_16x16p);
-      for (j = 0; j < TX_SIZES - 2; j++)
-        vp10_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j],
-                                  ct_16x16p[j]);
-    }
-
-    for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
-      vp10_tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], ct_32x32p);
-      for (j = 0; j < TX_SIZES - 1; j++)
-        vp10_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j],
-                                  ct_32x32p[j]);
-    }
+    for (i = 0; i < TX_SIZES - 1; ++i)
+      for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+        prob_diff_update(vp10_tx_size_tree[i],
+                         cm->fc->tx_size_probs[i][j],
+                         counts->tx_size[i][j], i + 2, w);
   }
 }
 
diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c
index 8c7af63f0..ae04e2a1d 100644
--- a/vp10/encoder/encodeframe.c
+++ b/vp10/encoder/encodeframe.c
@@ -4148,19 +4148,18 @@ void vp10_encode_frame(VP10_COMP *cpi) {
       int count8x8_lp = 0, count8x8_8x8p = 0;
       int count16x16_16x16p = 0, count16x16_lp = 0;
       int count32x32 = 0;
-
       for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
-        count4x4 += counts->tx.p32x32[i][TX_4X4];
-        count4x4 += counts->tx.p16x16[i][TX_4X4];
-        count4x4 += counts->tx.p8x8[i][TX_4X4];
+        count4x4 += counts->tx_size[0][i][TX_4X4];
+        count4x4 += counts->tx_size[1][i][TX_4X4];
+        count4x4 += counts->tx_size[2][i][TX_4X4];
 
-        count8x8_lp += counts->tx.p32x32[i][TX_8X8];
-        count8x8_lp += counts->tx.p16x16[i][TX_8X8];
-        count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
+        count8x8_lp += counts->tx_size[1][i][TX_8X8];
+        count8x8_lp += counts->tx_size[2][i][TX_8X8];
+        count8x8_8x8p += counts->tx_size[0][i][TX_8X8];
 
-        count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
-        count16x16_lp += counts->tx.p32x32[i][TX_16X16];
-        count32x32 += counts->tx.p32x32[i][TX_32X32];
+        count16x16_16x16p += counts->tx_size[1][i][TX_16X16];
+        count16x16_lp += counts->tx_size[2][i][TX_16X16];
+        count32x32 += counts->tx_size[2][i][TX_32X32];
       }
       if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
 #if CONFIG_SUPERTX
@@ -4521,8 +4520,8 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
       if (is_inter_block(mbmi))
         tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts);
 #endif
-      ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
-                      &td->counts->tx)[mbmi->tx_size];
+      ++td->counts->tx_size[max_txsize_lookup[bsize] - TX_8X8]
+                           [get_tx_size_context(xd)][mbmi->tx_size];
     } else {
       int x, y;
       TX_SIZE tx_size;
@@ -4538,8 +4537,8 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td,
           if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
             mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
     }
-    ++td->counts->tx.tx_totals[mbmi->tx_size];
-    ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
+    ++td->counts->tx_size_totals[mbmi->tx_size];
+    ++td->counts->tx_size_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
 #if CONFIG_EXT_TX
     if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 &&
         cm->base_qindex > 0 && !mbmi->skip &&
diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h
index efde0fc31..df721fd34 100644
--- a/vp10/encoder/encoder.h
+++ b/vp10/encoder/encoder.h
@@ -516,6 +516,7 @@ typedef struct VP10_COMP {
                                                  [PALETTE_COLORS];
   int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS]
                                                   [PALETTE_COLORS];
+  int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
 #if CONFIG_EXT_TX
   int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES];
   int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES]
diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c
index 3f60a1b7b..78e8e9a36 100644
--- a/vp10/encoder/rd.c
+++ b/vp10/encoder/rd.c
@@ -104,6 +104,12 @@ static void fill_mode_costs(VP10_COMP *cpi) {
                        vp10_default_palette_uv_color_prob[i][j],
                        vp10_palette_color_tree[i]);
     }
+
+  for (i = 0; i < TX_SIZES - 1; ++i)
+    for (j = 0; j < TX_SIZE_CONTEXTS; ++j)
+      vp10_cost_tokens(cpi->tx_size_cost[i][j], fc->tx_size_probs[i][j],
+                       vp10_tx_size_tree[i]);
+
 #if CONFIG_EXT_TX
   for (i = TX_4X4; i < EXT_TX_SIZES; ++i) {
     int s;
diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c
index 1416f3a29..ba864e40d 100644
--- a/vp10/encoder/rdopt.c
+++ b/vp10/encoder/rdopt.c
@@ -1251,21 +1251,6 @@ static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x,
                    mbmi->tx_size, cpi->sf.use_fast_coef_costing);
 }
 
-static INLINE int vp10_cost_tx_size(TX_SIZE tx_size, TX_SIZE max_tx_size,
-                                    const vpx_prob *tx_probs) {
-  int m;
-  int r_tx_size = 0;
-
-  for (m = 0; m <= tx_size - (tx_size == max_tx_size); ++m) {
-    if (m == tx_size)
-      r_tx_size += vp10_cost_zero(tx_probs[m]);
-    else
-      r_tx_size += vp10_cost_one(tx_probs[m]);
-  }
-
-  return r_tx_size;
-}
-
 static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
                                    int *rate,
                                    int64_t *distortion,
@@ -1288,7 +1273,6 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
   int start_tx, end_tx;
   const int tx_select = cm->tx_mode == TX_MODE_SELECT;
   const int is_inter = is_inter_block(mbmi);
-  const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
   TX_TYPE tx_type, best_tx_type = DCT_DCT;
   int prune = 0;
 #if CONFIG_EXT_TX
@@ -1320,7 +1304,8 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x,
   for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) {
     last_rd = INT64_MAX;
     for (n = start_tx; n >= end_tx; --n) {
-      const int r_tx_size = vp10_cost_tx_size(n, max_tx_size, tx_probs);
+      const int r_tx_size =
+          cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)][n];
       if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n))
           continue;
 #if CONFIG_EXT_TX
@@ -2393,8 +2378,6 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
   const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0);
   const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0);
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
-  const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd,
-                                           &cpi->common.fc->tx_probs);
   bmode_costs = cpi->y_mode_costs[A][L];
 
 #if CONFIG_EXT_INTRA
@@ -2471,8 +2454,9 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x,
       // tokenonly rate, but for intra blocks, tx_size is always coded
       // (prediction granularity), so we account for it in the full rate,
       // not the tokenonly rate.
-      this_rate_tokenonly -= vp10_cost_tx_size(mic->mbmi.tx_size, max_tx_size,
-                                               tx_probs);
+      this_rate_tokenonly -=
+          cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
+                                                 [mic->mbmi.tx_size];
     }
     if (cpi->common.allow_screen_content_tools && mode == DC_PRED)
       this_rate +=
@@ -6719,7 +6703,6 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
   int64_t mask_filter = 0;
   int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
   const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
-  const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
 #if CONFIG_OBMC
 #if CONFIG_VP9_HIGHBITDEPTH
   DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]);
@@ -7276,7 +7259,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi,
         // tokenonly rate, but for intra blocks, tx_size is always coded
         // (prediction granularity), so we account for it in the full rate,
         // not the tokenonly rate.
-        rate_y -= vp10_cost_tx_size(mbmi->tx_size, max_tx_size, tx_probs);
+        rate_y -=
+            cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)]
+                                                   [mbmi->tx_size];
       }
 #if CONFIG_EXT_INTRA
       if (is_directional_mode) {