From: hui su Date: Mon, 7 Mar 2016 23:25:50 +0000 (-0800) Subject: Refactor entropy coding of transform size X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=954e560f9e4528697c110016adf6341ccbdb0b7f;p=libvpx Refactor entropy coding of transform size No performance change. Change-Id: If35125fed909d89235b303514f77a33183bb36b3 --- diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index e4c27a777..d799b1af5 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -320,17 +320,6 @@ static const vpx_prob default_single_ref_p[REF_CONTEXTS][SINGLE_REFS - 1] = { #endif // CONFIG_EXT_REFS }; -static const struct tx_probs default_tx_probs = { - { { 3, 136, 37 }, - { 5, 52, 13 } }, - - { { 20, 152 }, - { 15, 101 } }, - - { { 100 }, - { 66 } } -}; - const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)] = { -TWO_COLORS, 2, -THREE_COLORS, 4, @@ -694,6 +683,34 @@ static const int palette_color_context_lookup[PALETTE_COLOR_CONTEXTS] = { 9680, 10648, 10890, 13310 }; +const vpx_tree_index vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)] = { + { // Max tx_size is 8X8 + -TX_4X4, -TX_8X8, + }, + { // Max tx_size is 16X16 + -TX_4X4, 2, + -TX_8X8, -TX_16X16, + }, + { // Max tx_size is 32X32 + -TX_4X4, 2, + -TX_8X8, 4, + -TX_16X16, -TX_32X32, + }, +}; + +static const vpx_prob +default_tx_size_prob[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1] = { + { // Max tx_size is 8X8 + { 100, }, { 66, }, + }, + { // Max tx_size is 16X16 + { 20, 152, }, { 15, 101, }, + }, + { // Max tx_size is 32X32 + { 3, 136, 37 }, { 5, 52, 13 }, + }, +}; + int vp10_get_palette_color_context(const uint8_t *color_map, int cols, int r, int c, int n, int *color_order) { int i, j, max, max_idx, temp; @@ -767,33 +784,6 @@ int vp10_get_palette_color_context(const uint8_t *color_map, int cols, return color_ctx; } -void vp10_tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, - unsigned int (*ct_32x32p)[2]) { - ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; - ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + - tx_count_32x32p[TX_16X16] + - tx_count_32x32p[TX_32X32]; - ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; - ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + - tx_count_32x32p[TX_32X32]; - ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; - ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; -} - -void vp10_tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, - unsigned int (*ct_16x16p)[2]) { - ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; - ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; - ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; - ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; -} - -void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, - unsigned int (*ct_8x8p)[2]) { - ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; - ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; -} - #if CONFIG_VAR_TX static const vpx_prob default_txfm_partition_probs[TXFM_PARTITION_CONTEXTS] = { 192, 128, 64, 192, 128, 64, 192, 128, 64, @@ -1315,7 +1305,7 @@ static void init_mode_probs(FRAME_CONTEXT *fc) { vp10_copy(fc->comp_inter_prob, default_comp_inter_p); vp10_copy(fc->comp_ref_prob, default_comp_ref_p); vp10_copy(fc->single_ref_prob, default_single_ref_p); - fc->tx_probs = default_tx_probs; + vp10_copy(fc->tx_size_probs, default_tx_size_prob); #if CONFIG_VAR_TX vp10_copy(fc->txfm_partition_prob, default_txfm_partition_probs); #endif @@ -1467,32 +1457,18 @@ void vp10_adapt_inter_frame_probs(VP10_COMMON *cm) { } void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) { - int i; + int i, j; FRAME_CONTEXT *fc = cm->fc; const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; const FRAME_COUNTS *counts = &cm->counts; if (cm->tx_mode == TX_MODE_SELECT) { - int j; - unsigned int branch_ct_8x8p[TX_SIZES - 3][2]; - unsigned int branch_ct_16x16p[TX_SIZES - 2][2]; - unsigned int branch_ct_32x32p[TX_SIZES - 1][2]; - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { - vp10_tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); - for (j = 0; j < TX_SIZES - 3; ++j) - fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); - - vp10_tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); - for (j = 0; j < TX_SIZES - 2; ++j) - fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); - - vp10_tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); - for (j = 0; j < TX_SIZES - 1; ++j) - fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs( - pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); + for (i = 0; i < TX_SIZES - 1; ++i) { + for (j = 0; j < TX_SIZE_CONTEXTS; ++j) + vpx_tree_merge_probs(vp10_tx_size_tree[i], + pre_fc->tx_size_probs[i][j], + counts->tx_size[i][j], + fc->tx_size_probs[i][j]); } } @@ -1532,7 +1508,6 @@ void vp10_adapt_intra_frame_probs(VP10_COMMON *cm) { } #else for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - int j; for (j = 0; j < TX_TYPES; ++j) vpx_tree_merge_probs(vp10_ext_tx_tree, pre_fc->intra_ext_tx_prob[i][j], diff --git a/vp10/common/entropymode.h b/vp10/common/entropymode.h index d9858b3d3..0c9c33246 100644 --- a/vp10/common/entropymode.h +++ b/vp10/common/entropymode.h @@ -37,19 +37,6 @@ extern "C" { struct VP10Common; -struct tx_probs { - vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1]; - vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2]; - vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3]; -}; - -struct tx_counts { - unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES]; - unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1]; - unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; - unsigned int tx_totals[TX_SIZES]; -}; - struct seg_counts { unsigned int tree_total[MAX_SEGMENTS]; unsigned int tree_mispred[MAX_SEGMENTS]; @@ -89,7 +76,7 @@ typedef struct frame_contexts { vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; vpx_prob single_ref_prob[REF_CONTEXTS][SINGLE_REFS-1]; vpx_prob comp_ref_prob[REF_CONTEXTS][COMP_REFS-1]; - struct tx_probs tx_probs; + vpx_prob tx_size_probs[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES - 1]; #if CONFIG_VAR_TX vpx_prob txfm_partition_prob[TXFM_PARTITION_CONTEXTS]; #endif @@ -151,7 +138,8 @@ typedef struct FRAME_COUNTS { unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS-1][2]; unsigned int comp_ref[REF_CONTEXTS][COMP_REFS-1][2]; - struct tx_counts tx; + unsigned int tx_size_totals[TX_SIZES]; + unsigned int tx_size[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; #if CONFIG_VAR_TX unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2]; #endif @@ -205,6 +193,8 @@ extern const vpx_tree_index vp10_switchable_interp_tree extern const vpx_tree_index vp10_palette_size_tree[TREE_SIZE(PALETTE_SIZES)]; extern const vpx_tree_index vp10_palette_color_tree[PALETTE_MAX_SIZE - 1][TREE_SIZE(PALETTE_COLORS)]; +extern const vpx_tree_index +vp10_tx_size_tree[TX_SIZES - 1][TREE_SIZE(TX_SIZES)]; #if CONFIG_EXT_INTRA extern const vpx_tree_index vp10_intra_filter_tree[TREE_SIZE(INTRA_FILTERS)]; #endif // CONFIG_EXT_INTRA @@ -223,13 +213,6 @@ void vp10_setup_past_independence(struct VP10Common *cm); void vp10_adapt_intra_frame_probs(struct VP10Common *cm); void vp10_adapt_inter_frame_probs(struct VP10Common *cm); -void vp10_tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, - unsigned int (*ct_32x32p)[2]); -void vp10_tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, - unsigned int (*ct_16x16p)[2]); -void vp10_tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, - unsigned int (*ct_8x8p)[2]); - static INLINE int vp10_ceil_log2(int n) { int i = 1, p = 2; while (p < n) { diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h index 7d2f28a8e..83a3597a3 100644 --- a/vp10/common/pred_common.h +++ b/vp10/common/pred_common.h @@ -185,48 +185,11 @@ static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { return (above_ctx + left_ctx) > max_tx_size; } -static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, - const struct tx_probs *tx_probs) { - switch (max_tx_size) { - case TX_8X8: - return tx_probs->p8x8[ctx]; - case TX_16X16: - return tx_probs->p16x16[ctx]; - case TX_32X32: - return tx_probs->p32x32[ctx]; - default: - assert(0 && "Invalid max_tx_size."); - return NULL; - } -} - -static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size, - const MACROBLOCKD *xd, - const struct tx_probs *tx_probs) { - return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs); -} - -static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, - struct tx_counts *tx_counts) { - switch (max_tx_size) { - case TX_8X8: - return tx_counts->p8x8[ctx]; - case TX_16X16: - return tx_counts->p16x16[ctx]; - case TX_32X32: - return tx_counts->p32x32[ctx]; - default: - assert(0 && "Invalid max_tx_size."); - return NULL; - } -} - #if CONFIG_VAR_TX static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd, MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int blk_row, int blk_col, - TX_SIZE max_tx_size, int ctx, - struct tx_counts *tx_counts) { + TX_SIZE max_tx_size, int ctx) { const struct macroblockd_plane *const pd = &xd->plane[0]; const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + @@ -244,7 +207,7 @@ static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd, return; if (tx_size == plane_tx_size) { - ++get_tx_counts(max_tx_size, ctx, tx_counts)[tx_size]; + ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][tx_size]; mbmi->tx_size = tx_size; } else { int bsl = b_width_log2_lookup[bsize]; @@ -260,8 +223,7 @@ static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd, if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; update_tx_counts(cm, xd, mbmi, plane_bsize, - tx_size - 1, offsetr, offsetc, - max_tx_size, ctx, tx_counts); + tx_size - 1, offsetr, offsetc, max_tx_size, ctx); } } } @@ -270,8 +232,7 @@ static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, MACROBLOCKD *xd, MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize, - int ctx, - struct tx_counts *tx_counts) { + int ctx) { const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; @@ -282,7 +243,7 @@ static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, for (idy = 0; idy < mi_height; idy += bh) for (idx = 0; idx < mi_width; idx += bh) update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx, - max_tx_size, ctx, tx_counts); + max_tx_size, ctx); } #endif diff --git a/vp10/common/thread_common.c b/vp10/common/thread_common.c index ecc971a7c..c916babdc 100644 --- a/vp10/common/thread_common.c +++ b/vp10/common/thread_common.c @@ -432,19 +432,13 @@ void vp10_accumulate_frame_counts(VP10_COMMON *cm, FRAME_COUNTS *counts, for (k = 0; k < 2; k++) cm->counts.comp_ref[i][j][k] += counts->comp_ref[i][j][k]; - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - for (j = 0; j < TX_SIZES; j++) - cm->counts.tx.p32x32[i][j] += counts->tx.p32x32[i][j]; - - for (j = 0; j < TX_SIZES - 1; j++) - cm->counts.tx.p16x16[i][j] += counts->tx.p16x16[i][j]; + for (i = 0; i < TX_SIZES - 1; ++i) + for (j = 0; j < TX_SIZE_CONTEXTS; ++j) + for (k = 0; k < i + 2; ++k) + cm->counts.tx_size[i][j][k] += counts->tx_size[i][j][k]; - for (j = 0; j < TX_SIZES - 2; j++) - cm->counts.tx.p8x8[i][j] += counts->tx.p8x8[i][j]; - } - - for (i = 0; i < TX_SIZES; i++) - cm->counts.tx.tx_totals[i] += counts->tx.tx_totals[i]; + for (i = 0; i < TX_SIZES; ++i) + cm->counts.tx_size_totals[i] += counts->tx_size_totals[i]; #if CONFIG_VAR_TX for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 64ac3ccf3..2886c854c 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -100,22 +100,6 @@ static TX_MODE read_tx_mode(struct vpx_read_bit_buffer *rb) { return vpx_rb_read_bit(rb) ? TX_MODE_SELECT : vpx_rb_read_literal(rb, 2); } -static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) { - int i, j; - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 3; ++j) - vp10_diff_update_prob(r, &tx_probs->p8x8[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 2; ++j) - vp10_diff_update_prob(r, &tx_probs->p16x16[i][j]); - - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) - for (j = 0; j < TX_SIZES - 1; ++j) - vp10_diff_update_prob(r, &tx_probs->p32x32[i][j]); -} - static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) { int i, j; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) @@ -3541,8 +3525,13 @@ static int read_compressed_header(VP10Decoder *pbi, const uint8_t *data, vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); - if (cm->tx_mode == TX_MODE_SELECT) - read_tx_mode_probs(&fc->tx_probs, &r); + if (cm->tx_mode == TX_MODE_SELECT) { + for (i = 0; i < TX_SIZES - 1; ++i) + for (j = 0; j < TX_SIZE_CONTEXTS; ++j) + for (k = 0; k < i + 1; ++k) + vp10_diff_update_prob(&r, &fc->tx_size_probs[i][j][k]); + } + read_coef_probs(fc, cm->tx_mode, &r); #if CONFIG_VAR_TX @@ -3679,7 +3668,8 @@ static void debug_check_frame_counts(const VP10_COMMON *const cm) { sizeof(cm->counts.single_ref))); assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref, sizeof(cm->counts.comp_ref))); - assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); + assert(!memcmp(&cm->counts.tx_size, &zero_counts.tx_size, + sizeof(cm->counts.tx_size))); assert(!memcmp(cm->counts.skip, zero_counts.skip, sizeof(cm->counts.skip))); #if CONFIG_REF_MV assert(!memcmp(&cm->counts.mv[0], &zero_counts.mv[0], diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index fccd3c880..eefef748f 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c @@ -273,16 +273,11 @@ static TX_SIZE read_selected_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd, TX_SIZE max_tx_size, vpx_reader *r) { FRAME_COUNTS *counts = xd->counts; const int ctx = get_tx_size_context(xd); - const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs); - int tx_size = vpx_read(r, tx_probs[0]); - if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { - tx_size += vpx_read(r, tx_probs[1]); - if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) - tx_size += vpx_read(r, tx_probs[2]); - } - + const int tx_size_cat = max_tx_size - TX_8X8; + int tx_size = vpx_read_tree(r, vp10_tx_size_tree[tx_size_cat], + cm->fc->tx_size_probs[tx_size_cat][ctx]); if (counts) - ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size]; + ++counts->tx_size[tx_size_cat][ctx][tx_size]; return (TX_SIZE)tx_size; } @@ -1508,7 +1503,7 @@ static void read_inter_frame_mode_info(VP10Decoder *const pbi, idy, idx, r); if (xd->counts) { const int ctx = get_tx_size_context(xd); - ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size]; + ++xd->counts->tx_size[max_tx_size - TX_8X8][ctx][mbmi->tx_size]; } } else { mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); diff --git a/vp10/encoder/bitstream.c b/vp10/encoder/bitstream.c index 2603b6b48..7d1e8820e 100644 --- a/vp10/encoder/bitstream.c +++ b/vp10/encoder/bitstream.c @@ -81,6 +81,13 @@ palette_color_encodings[PALETTE_MAX_SIZE - 1][PALETTE_MAX_SIZE] = { {30, 5}, {62, 6}, {126, 7}, {127, 7}}, // 8 colors }; +static const struct vp10_token +tx_size_encodings[TX_SIZES - 1][TX_SIZES] = { + {{0, 1}, {1, 1}}, // Max tx_size is 8X8 + {{0, 1}, {2, 2}, {3, 2}}, // Max tx_size is 16X16 + {{0, 1}, {2, 2}, {6, 3}, {7, 3}}, // Max tx_size is 32X32 +}; + static INLINE void write_uniform(vpx_writer *w, int n, int v) { int l = get_unsigned_bits(n); int m = (1 << l) - n; @@ -314,13 +321,11 @@ static void write_selected_tx_size(const VP10_COMMON *cm, TX_SIZE tx_size = xd->mi[0]->mbmi.tx_size; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; - const vpx_prob *const tx_probs = get_tx_probs2(max_tx_size, xd, - &cm->fc->tx_probs); - vpx_write(w, tx_size != TX_4X4, tx_probs[0]); - if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { - vpx_write(w, tx_size != TX_8X8, tx_probs[1]); - if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) - vpx_write(w, tx_size != TX_16X16, tx_probs[2]); + if (max_tx_size > TX_4X4) { + vp10_write_token(w, vp10_tx_size_tree[max_tx_size - TX_8X8], + cm->fc->tx_size_probs[max_tx_size - TX_8X8] + [get_tx_size_context(xd)], + &tx_size_encodings[max_tx_size - TX_8X8][tx_size]); } } @@ -1847,7 +1852,7 @@ static void update_coef_probs(VP10_COMP *cpi, vpx_writer* w) { for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) { vp10_coeff_stats frame_branch_ct[PLANE_TYPES]; vp10_coeff_probs_model frame_coef_probs[PLANE_TYPES]; - if (cpi->td.counts->tx.tx_totals[tx_size] <= 20 || + if (cpi->td.counts->tx_size_totals[tx_size] <= 20 || (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) { vpx_write_bit(w, 0); } else { @@ -2028,30 +2033,11 @@ static void update_txfm_probs(VP10_COMMON *cm, vpx_writer *w, FRAME_COUNTS *counts) { if (cm->tx_mode == TX_MODE_SELECT) { int i, j; - unsigned int ct_8x8p[TX_SIZES - 3][2]; - unsigned int ct_16x16p[TX_SIZES - 2][2]; - unsigned int ct_32x32p[TX_SIZES - 1][2]; - - - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - vp10_tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p); - for (j = 0; j < TX_SIZES - 3; j++) - vp10_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]); - } - - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - vp10_tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], ct_16x16p); - for (j = 0; j < TX_SIZES - 2; j++) - vp10_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j], - ct_16x16p[j]); - } - - for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - vp10_tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], ct_32x32p); - for (j = 0; j < TX_SIZES - 1; j++) - vp10_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j], - ct_32x32p[j]); - } + for (i = 0; i < TX_SIZES - 1; ++i) + for (j = 0; j < TX_SIZE_CONTEXTS; ++j) + prob_diff_update(vp10_tx_size_tree[i], + cm->fc->tx_size_probs[i][j], + counts->tx_size[i][j], i + 2, w); } } diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 8c7af63f0..ae04e2a1d 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -4148,19 +4148,18 @@ void vp10_encode_frame(VP10_COMP *cpi) { int count8x8_lp = 0, count8x8_8x8p = 0; int count16x16_16x16p = 0, count16x16_lp = 0; int count32x32 = 0; - for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { - count4x4 += counts->tx.p32x32[i][TX_4X4]; - count4x4 += counts->tx.p16x16[i][TX_4X4]; - count4x4 += counts->tx.p8x8[i][TX_4X4]; + count4x4 += counts->tx_size[0][i][TX_4X4]; + count4x4 += counts->tx_size[1][i][TX_4X4]; + count4x4 += counts->tx_size[2][i][TX_4X4]; - count8x8_lp += counts->tx.p32x32[i][TX_8X8]; - count8x8_lp += counts->tx.p16x16[i][TX_8X8]; - count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; + count8x8_lp += counts->tx_size[1][i][TX_8X8]; + count8x8_lp += counts->tx_size[2][i][TX_8X8]; + count8x8_8x8p += counts->tx_size[0][i][TX_8X8]; - count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; - count16x16_lp += counts->tx.p32x32[i][TX_16X16]; - count32x32 += counts->tx.p32x32[i][TX_32X32]; + count16x16_16x16p += counts->tx_size[1][i][TX_16X16]; + count16x16_lp += counts->tx_size[2][i][TX_16X16]; + count32x32 += counts->tx_size[2][i][TX_32X32]; } if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && #if CONFIG_SUPERTX @@ -4521,8 +4520,8 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, if (is_inter_block(mbmi)) tx_partition_count_update(cm, xd, bsize, mi_row, mi_col, td->counts); #endif - ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), - &td->counts->tx)[mbmi->tx_size]; + ++td->counts->tx_size[max_txsize_lookup[bsize] - TX_8X8] + [get_tx_size_context(xd)][mbmi->tx_size]; } else { int x, y; TX_SIZE tx_size; @@ -4538,8 +4537,8 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows) mi_8x8[mis * y + x]->mbmi.tx_size = tx_size; } - ++td->counts->tx.tx_totals[mbmi->tx_size]; - ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; + ++td->counts->tx_size_totals[mbmi->tx_size]; + ++td->counts->tx_size_totals[get_uv_tx_size(mbmi, &xd->plane[1])]; #if CONFIG_EXT_TX if (get_ext_tx_types(mbmi->tx_size, bsize, is_inter_block(mbmi)) > 1 && cm->base_qindex > 0 && !mbmi->skip && diff --git a/vp10/encoder/encoder.h b/vp10/encoder/encoder.h index efde0fc31..df721fd34 100644 --- a/vp10/encoder/encoder.h +++ b/vp10/encoder/encoder.h @@ -516,6 +516,7 @@ typedef struct VP10_COMP { [PALETTE_COLORS]; int palette_uv_color_cost[PALETTE_MAX_SIZE - 1][PALETTE_COLOR_CONTEXTS] [PALETTE_COLORS]; + int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; #if CONFIG_EXT_TX int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] diff --git a/vp10/encoder/rd.c b/vp10/encoder/rd.c index 3f60a1b7b..78e8e9a36 100644 --- a/vp10/encoder/rd.c +++ b/vp10/encoder/rd.c @@ -104,6 +104,12 @@ static void fill_mode_costs(VP10_COMP *cpi) { vp10_default_palette_uv_color_prob[i][j], vp10_palette_color_tree[i]); } + + for (i = 0; i < TX_SIZES - 1; ++i) + for (j = 0; j < TX_SIZE_CONTEXTS; ++j) + vp10_cost_tokens(cpi->tx_size_cost[i][j], fc->tx_size_probs[i][j], + vp10_tx_size_tree[i]); + #if CONFIG_EXT_TX for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { int s; diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 1416f3a29..ba864e40d 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -1251,21 +1251,6 @@ static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, mbmi->tx_size, cpi->sf.use_fast_coef_costing); } -static INLINE int vp10_cost_tx_size(TX_SIZE tx_size, TX_SIZE max_tx_size, - const vpx_prob *tx_probs) { - int m; - int r_tx_size = 0; - - for (m = 0; m <= tx_size - (tx_size == max_tx_size); ++m) { - if (m == tx_size) - r_tx_size += vp10_cost_zero(tx_probs[m]); - else - r_tx_size += vp10_cost_one(tx_probs[m]); - } - - return r_tx_size; -} - static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, @@ -1288,7 +1273,6 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, int start_tx, end_tx; const int tx_select = cm->tx_mode == TX_MODE_SELECT; const int is_inter = is_inter_block(mbmi); - const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); TX_TYPE tx_type, best_tx_type = DCT_DCT; int prune = 0; #if CONFIG_EXT_TX @@ -1320,7 +1304,8 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, for (tx_type = DCT_DCT; tx_type < TX_TYPES; ++tx_type) { last_rd = INT64_MAX; for (n = start_tx; n >= end_tx; --n) { - const int r_tx_size = vp10_cost_tx_size(n, max_tx_size, tx_probs); + const int r_tx_size = + cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)][n]; if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, n)) continue; #if CONFIG_EXT_TX @@ -2393,8 +2378,6 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, const PREDICTION_MODE A = vp10_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp10_left_block_mode(mic, left_mi, 0); const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; - const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, - &cpi->common.fc->tx_probs); bmode_costs = cpi->y_mode_costs[A][L]; #if CONFIG_EXT_INTRA @@ -2471,8 +2454,9 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, // tokenonly rate, but for intra blocks, tx_size is always coded // (prediction granularity), so we account for it in the full rate, // not the tokenonly rate. - this_rate_tokenonly -= vp10_cost_tx_size(mic->mbmi.tx_size, max_tx_size, - tx_probs); + this_rate_tokenonly -= + cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)] + [mic->mbmi.tx_size]; } if (cpi->common.allow_screen_content_tools && mode == DC_PRED) this_rate += @@ -6719,7 +6703,6 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, int64_t mask_filter = 0; int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; - const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); #if CONFIG_OBMC #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * 64 * 64]); @@ -7276,7 +7259,9 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, // tokenonly rate, but for intra blocks, tx_size is always coded // (prediction granularity), so we account for it in the full rate, // not the tokenonly rate. - rate_y -= vp10_cost_tx_size(mbmi->tx_size, max_tx_size, tx_probs); + rate_y -= + cpi->tx_size_cost[max_tx_size - TX_8X8][get_tx_size_context(xd)] + [mbmi->tx_size]; } #if CONFIG_EXT_INTRA if (is_directional_mode) {