From d4b8dd76c47afcb38ed91e6af71fb6487cea6f2e Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Tue, 21 Apr 2015 15:34:39 -0700 Subject: [PATCH] Make chroma component RD estimate support transform partition This commit makes the rate-distortion estimation of the chroma components support the recursive transform block partition inferred from the luma component mode decisions. Change-Id: I2e038bebf558da406e966015952ad1058bdf4766 --- vp9/encoder/vp9_encodemb.c | 36 ++++++-- vp9/encoder/vp9_rdopt.c | 171 ++++++++++++++++++++++++++++++++++++- 2 files changed, 199 insertions(+), 8 deletions(-) diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 0604b2cf4..33215b327 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -719,6 +719,7 @@ static void encode_block_b(int blk_row, int blk_col, int plane, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint8_t *dst; ENTROPY_CONTEXT *a, *l; + int i; dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; a = &ctx->ta[plane][blk_col]; l = &ctx->tl[plane][blk_row]; @@ -735,12 +736,37 @@ static void encode_block_b(int blk_row, int blk_col, int plane, plane_bsize, tx_size); if (x->optimize) { - const int ctx = combine_entropy_contexts(*a, *l); - *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0; + int context; + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + a[0] = !!*(const uint16_t *)&a[0]; + l[0] = !!*(const uint16_t *)&l[0]; + break; + case TX_16X16: + a[0] = !!*(const uint32_t *)&a[0]; + l[0] = !!*(const uint32_t *)&l[0]; + break; + case TX_32X32: + a[0] = !!*(const uint64_t *)&a[0]; + l[0] = !!*(const uint64_t *)&l[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + context = combine_entropy_contexts(*a, *l); + *a = *l = optimize_b(x, plane, block, tx_size, context) > 0; } else { *a = *l = p->eobs[block] > 0; } + for (i = 0; i < (1 << tx_size); ++i) { + a[i] = a[0]; + l[i] = l[0]; + } + if (p->eobs[block]) *(args->skip) = 0; @@ -893,11 +919,7 @@ void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { vp9_subtract_plane(x, bsize, plane); - if (x->optimize) { - const struct macroblockd_plane* const pd = &xd->plane[plane]; - vp9_get_entropy_contexts(bsize, TX_4X4, pd, - ctx.ta[plane], ctx.tl[plane]); - } + vp9_get_entropy_contexts(bsize, TX_4X4, pd, ctx.ta[plane], ctx.tl[plane]); for (idy = 0; idy < mi_height; idy += bh) { for (idx = 0; idx < mi_width; idx += bh) { diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 0afe5537c..a8cbc0a86 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1136,8 +1136,177 @@ static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, return best_rd; } +static void tx_block_rd(const VP9_COMP *cpi, MACROBLOCK *x, + int blk_row, int blk_col, int plane, int block, + TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx, + int *rate, int64_t *dist, int64_t *bsse, int *skip) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + struct macroblockd_plane *const pd = &xd->plane[plane]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], plane_bsize, 0, 0) : + mbmi->inter_tx_size[tx_idx]; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + const int ss_txfrm_size = tx_size << 1; + const struct macroblock_plane *const p = &x->plane[plane]; + int64_t this_sse; + int shift = tx_size == TX_32X32 ? 0 : 2; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + ENTROPY_CONTEXT *ta = above_ctx + blk_col; + ENTROPY_CONTEXT *tl = left_ctx + blk_row; + scan_order const *sc = get_scan(xd, tx_size, pd->plane_type, 0); + int i; + + vp9_xform_quant_inter(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); + +#if CONFIG_VP9_HIGHBITDEPTH + *dist += vp9_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse, xd->bd) >> shift; +#else + *dist += vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse) >> shift; +#endif // CONFIG_VP9_HIGHBITDEPTH + *bsse += this_sse >> shift; + + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + ta[0] = !!*(const uint16_t *)&ta[0]; + tl[0] = !!*(const uint16_t *)&tl[0]; + break; + case TX_16X16: + ta[0] = !!*(const uint32_t *)&ta[0]; + tl[0] = !!*(const uint32_t *)&tl[0]; + break; + case TX_32X32: + ta[0] = !!*(const uint64_t *)&ta[0]; + tl[0] = !!*(const uint64_t *)&tl[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + + *rate += cost_coeffs(x, plane, block, ta, tl, tx_size, + sc->scan, sc->neighbors, 0); + + for (i = 0; i < (1 << tx_size); ++i) { + ta[i] = ta[0]; + tl[i] = tl[0]; + } + *skip &= (p->eobs[block] == 0); + } else { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bh = num_4x4_blocks_high_lookup[bsize]; + int step = 1 << (2 *(tx_size - 1)); + int i; + for (i = 0; i < 4; ++i) { + int offsetr = (i >> 1) * bh / 2; + int offsetc = (i & 0x01) * bh / 2; + tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane, + block + i * step, tx_size - 1, plane_bsize, + above_ctx, left_ctx, rate, dist, bsse, skip); + } + } +} + // Return value 0: early termination triggered, no valid rd cost available; // 1: rd cost values are valid. +static int inter_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; + int plane; + int is_cost_valid = 1; + int64_t this_rd; + + if (ref_best_rd < 0) + is_cost_valid = 0; + + if (is_inter_block(mbmi) && is_cost_valid) { + int plane; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) + vp9_subtract_plane(x, bsize, plane); + } + + *rate = 0; + *distortion = 0; + *sse = 0; + *skippable = 1; + + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_txsize_lookup[plane_bsize] * 2); + int pnrate = 0, pnskip = 1; + int64_t pndist = 0, pnsse = 0; + ENTROPY_CONTEXT ta[16], tl[16]; + + vp9_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + tx_block_rd(cpi, x, idy, idx, plane, block, + max_txsize_lookup[plane_bsize], plane_bsize, ta, tl, + &pnrate, &pndist, &pnsse, &pnskip); + block += step; + } + } + + if (pnrate == INT_MAX) { + is_cost_valid = 0; + break; + } + *rate += pnrate; + *distortion += pndist; + *sse += pnsse; + *skippable &= pnskip; + + this_rd = MIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion), + RDCOST(x->rdmult, x->rddiv, 0, *sse)); + if (this_rd > ref_best_rd) { + is_cost_valid = 0; + break; + } + } + + if (!is_cost_valid) { + // reset cost value + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } + + return is_cost_valid; +} + static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, BLOCK_SIZE bsize, @@ -2745,7 +2914,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); rdcosty = MIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); - if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, + if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, &sseuv, bsize, ref_best_rd - rdcosty)) { *rate2 = INT_MAX; *distortion = INT64_MAX; -- 2.49.0