From: Jingning Han Date: Fri, 16 Oct 2015 06:11:30 +0000 (-0700) Subject: Support per transform block skip coding X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bfeac5e19cfa6a083343545e20e99f72d31b1a53;p=libvpx Support per transform block skip coding Allow the encoder to drop individual transform block coding. Change-Id: I2c2b2985254cb92baf891f03daa33f067279373b --- diff --git a/vp10/encoder/block.h b/vp10/encoder/block.h index df3830cb5..55ec46eec 100644 --- a/vp10/encoder/block.h +++ b/vp10/encoder/block.h @@ -111,6 +111,9 @@ struct macroblock { // Notes transform blocks where no coefficents are coded. // Set during mode selection. Read during block encoding. uint8_t zcoeff_blk[TX_SIZES][256]; +#if CONFIG_VAR_TX + uint8_t blk_skip[MAX_MB_PLANE][256]; +#endif int skip; diff --git a/vp10/encoder/context_tree.c b/vp10/encoder/context_tree.c index 6c056d28e..1ac802f81 100644 --- a/vp10/encoder/context_tree.c +++ b/vp10/encoder/context_tree.c @@ -28,6 +28,10 @@ static void alloc_mode_context(VP10_COMMON *cm, int num_4x4_blk, CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, vpx_calloc(num_blk, sizeof(uint8_t))); for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_VAR_TX + CHECK_MEM_ERROR(cm, ctx->blk_skip[i], + vpx_calloc(num_blk, sizeof(uint8_t))); +#endif for (k = 0; k < 3; ++k) { CHECK_MEM_ERROR(cm, ctx->coeff[i][k], vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k]))); @@ -50,6 +54,10 @@ static void free_mode_context(PICK_MODE_CONTEXT *ctx) { vpx_free(ctx->zcoeff_blk); ctx->zcoeff_blk = 0; for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_VAR_TX + vpx_free(ctx->blk_skip[i]); + ctx->blk_skip[i] = 0; +#endif for (k = 0; k < 3; ++k) { vpx_free(ctx->coeff[i][k]); ctx->coeff[i][k] = 0; diff --git a/vp10/encoder/context_tree.h b/vp10/encoder/context_tree.h index 2a0fffbfb..55ae471c5 100644 --- a/vp10/encoder/context_tree.h +++ b/vp10/encoder/context_tree.h @@ -28,6 +28,9 @@ typedef struct { MB_MODE_INFO_EXT mbmi_ext; uint8_t *zcoeff_blk; uint8_t *color_index_map[2]; +#if CONFIG_VAR_TX + uint8_t *blk_skip[MAX_MB_PLANE]; +#endif tran_low_t *coeff[MAX_MB_PLANE][3]; tran_low_t *qcoeff[MAX_MB_PLANE][3]; tran_low_t *dqcoeff[MAX_MB_PLANE][3]; diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 784b2580e..44a57e346 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -1031,8 +1031,15 @@ static void update_state(VP10_COMP *cpi, ThreadData *td, } x->skip = ctx->skip; + +#if CONFIG_VAR_TX + for (i = 0; i < MAX_MB_PLANE; ++i) + memcpy(x->blk_skip[i], ctx->blk_skip[i], + sizeof(uint8_t) * ctx->num_4x4_blk); +#else memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk, sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); +#endif if (!output_enabled) return; @@ -2841,6 +2848,9 @@ static void encode_frame_internal(VP10_COMP *cpi) { x->quant_fp = cpi->sf.use_quant_fp; vp10_zero(x->skip_txfm); +#if CONFIG_VAR_TX + vp10_zero(x->blk_skip); +#endif { struct vpx_usec_timer emr_timer; @@ -3316,7 +3326,6 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, max_txsize_lookup[bsize]); else tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4; - mbmi->tx_size = tx_size; set_txfm_ctx(xd->left_txfm_context, tx_size, xd->n8_h); set_txfm_ctx(xd->above_txfm_context, tx_size, xd->n8_w); diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 966d05c4d..5f6e3b2af 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -1393,6 +1393,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size); #if CONFIG_VAR_TX int i; + const int bwl = b_width_log2_lookup[plane_bsize]; #endif dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; a = &ctx->ta[plane][blk_col]; @@ -1408,7 +1409,12 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, // return; // } +#if CONFIG_VAR_TX + if (!x->skip_recode && + x->blk_skip[plane][(blk_row << bwl) + blk_col] == 0) { +#else if (!x->skip_recode) { +#endif if (x->quant_fp) { // Encoding process for rtc mode if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) { @@ -1435,7 +1441,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, // skip forward transform p->eobs[block] = 0; *a = *l = 0; +#if !CONFIG_VAR_TX return; +#endif } } else { vp10_xform_quant(x, plane, block, blk_row, blk_col, @@ -1443,6 +1451,12 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, } } } +#if CONFIG_VAR_TX + else { + if (!x->skip_recode) + p->eobs[block] = 0; + } +#endif if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { int ctx; diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 4a4362e00..0dbbd81ab 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -1733,6 +1733,7 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, int sum_rate = vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 1); int all_skip = 1; int tmp_eob = 0; + int zero_blk_rate; if (ref_best_rd < 0) { *is_cost_valid = 0; @@ -1775,10 +1776,27 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; + zero_blk_rate = + x->token_costs[tx_size][pd->plane_type][1][0][0][coeff_ctx][EOB_TOKEN]; + if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) { mbmi->inter_tx_size[tx_idx] = tx_size; tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, coeff_ctx, rate, dist, bsse, skip); + + if (RDCOST(x->rdmult, x->rddiv, *rate, *dist) >= + RDCOST(x->rdmult, x->rddiv, zero_blk_rate, *bsse) && (*skip == 0) && + !xd->lossless[mbmi->segment_id]) { + *rate = zero_blk_rate; + *dist = *bsse; + *skip = 1; + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 1; + p->eobs[block] = 0; + } else { + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = 0; + *skip = 0; + } + if (tx_size > TX_4X4) *rate += vp10_cost_bit(cpi->common.fc->txfm_partition_prob[ctx], 0); this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist); @@ -1833,6 +1851,7 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = tx_size; if (this_rd == INT64_MAX) *is_cost_valid = 0; + x->blk_skip[plane][blk_row * max_blocks_wide + blk_col] = *skip; } else { *rate = sum_rate; *dist = sum_dist; @@ -4368,8 +4387,15 @@ void vp10_rd_pick_inter_mode_sb(VP10_COMP *cpi, if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); + +#if CONFIG_VAR_TX + for (i = 0; i < MAX_MB_PLANE; ++i) + memcpy(ctx->blk_skip[i], x->blk_skip[i], + sizeof(uint8_t) * ctx->num_4x4_blk); +#else memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mbmi->tx_size], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); +#endif // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history @@ -5120,8 +5146,14 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, best_skip2 = this_skip2; if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); + +#if CONFIG_VAR_TX + for (i = 0; i < MAX_MB_PLANE; ++i) + memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk); +#else memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); +#endif for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i];