From: Jingning Han Date: Thu, 8 Oct 2015 23:46:10 +0000 (-0700) Subject: Make chroma component RD estimate support transform partition X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a8dad55c8262fd7b03f1379ec31ce37cc6d599cc;p=libvpx Make chroma component RD estimate support transform partition This commit makes the rate-distortion optimization for chroma component support the recursive transform block coding scheme. Change-Id: I1bfed6d05b0ebb3905cb625222401e2ccbae10f3 --- diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index cdc30791e..3f42615a7 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -1370,6 +1370,9 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, uint8_t *dst; ENTROPY_CONTEXT *a, *l; TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block, tx_size); +#if CONFIG_VAR_TX + int i; +#endif dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; a = &ctx->ta[plane][blk_col]; l = &ctx->tl[plane][blk_row]; @@ -1421,12 +1424,41 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, } if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { - const int ctx = combine_entropy_contexts(*a, *l); + int ctx; +#if CONFIG_VAR_TX + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + a[0] = !!*(const uint16_t *)&a[0]; + l[0] = !!*(const uint16_t *)&l[0]; + break; + case TX_16X16: + a[0] = !!*(const uint32_t *)&a[0]; + l[0] = !!*(const uint32_t *)&l[0]; + break; + case TX_32X32: + a[0] = !!*(const uint64_t *)&a[0]; + l[0] = !!*(const uint64_t *)&l[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } +#endif + ctx = combine_entropy_contexts(*a, *l); *a = *l = optimize_b(x, plane, block, tx_size, ctx) > 0; } else { *a = *l = p->eobs[block] > 0; } +#if CONFIG_VAR_TX + for (i = 0; i < (1 << tx_size); ++i) { + a[i] = a[0]; + l[i] = l[0]; + } +#endif + if (p->eobs[block]) *(args->skip) = 0; @@ -1613,10 +1645,15 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { vp10_subtract_plane(x, bsize, plane); if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { +#if CONFIG_VAR_TX + vp10_get_entropy_contexts(bsize, TX_4X4, pd, + ctx.ta[plane], ctx.tl[plane]); +#else const struct macroblockd_plane* const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; vp10_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); +#endif } #if CONFIG_VAR_TX diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index b44b03630..fa6664254 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -1201,6 +1201,188 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, return best_rd; } +#if CONFIG_VAR_TX +static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, + int blk_row, int blk_col, int plane, int block, + TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx, + int *rate, int64_t *dist, int64_t *bsse, int *skip) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblockd_plane *const pd = &xd->plane[plane]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], plane_bsize, 0, 0) : + mbmi->inter_tx_size[tx_idx]; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + const int ss_txfrm_size = tx_size << 1; + const struct macroblock_plane *const p = &x->plane[plane]; + int64_t this_sse; + int shift = tx_size == TX_32X32 ? 0 : 2; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + ENTROPY_CONTEXT *ta = above_ctx + blk_col; + ENTROPY_CONTEXT *tl = left_ctx + blk_row; + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + int i; + + vp10_xform_quant(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); + +#if CONFIG_VP9_HIGHBITDEPTH + *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse, xd->bd) >> shift; +#else + *dist += vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse) >> shift; +#endif // CONFIG_VP9_HIGHBITDEPTH + *bsse += this_sse >> shift; + + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + ta[0] = !!*(const uint16_t *)&ta[0]; + tl[0] = !!*(const uint16_t *)&tl[0]; + break; + case TX_16X16: + ta[0] = !!*(const uint32_t *)&ta[0]; + tl[0] = !!*(const uint32_t *)&tl[0]; + break; + case TX_32X32: + ta[0] = !!*(const uint64_t *)&ta[0]; + tl[0] = !!*(const uint64_t *)&tl[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + + *rate += cost_coeffs(x, plane, block, ta, tl, tx_size, + scan_order->scan, scan_order->neighbors, 0); + + for (i = 0; i < (1 << tx_size); ++i) { + ta[i] = ta[0]; + tl[i] = tl[0]; + } + *skip &= (p->eobs[block] == 0); + } else { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_width_log2_lookup[bsize]; + int step = 1 << (2 * (tx_size - 1)); + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + int offsetr = (i >> 1) << bsl; + int offsetc = (i & 0x01) << bsl; + tx_block_rd(cpi, x, blk_row + offsetr, blk_col + offsetc, plane, + block + i * step, tx_size - 1, plane_bsize, + above_ctx, left_ctx, rate, dist, bsse, skip); + } + } +} + +// Return value 0: early termination triggered, no valid rd cost available; +// 1: rd cost values are valid. +static int inter_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + int plane; + int is_cost_valid = 1; + int64_t this_rd; + + if (ref_best_rd < 0) + is_cost_valid = 0; + + if (is_inter_block(mbmi) && is_cost_valid) { + int plane; + for (plane = 1; plane < MAX_MB_PLANE; ++plane) + vp10_subtract_plane(x, bsize, plane); + } + + *rate = 0; + *distortion = 0; + *sse = 0; + *skippable = 1; + + for (plane = 1; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_txsize_lookup[plane_bsize] * 2); + int pnrate = 0, pnskip = 1; + int64_t pndist = 0, pnsse = 0; + ENTROPY_CONTEXT ta[16], tl[16]; + + vp10_get_entropy_contexts(bsize, TX_4X4, pd, ta, tl); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + tx_block_rd(cpi, x, idy, idx, plane, block, + max_txsize_lookup[plane_bsize], plane_bsize, ta, tl, + &pnrate, &pndist, &pnsse, &pnskip); + block += step; + } + } + + if (pnrate == INT_MAX) { + is_cost_valid = 0; + break; + } + + *rate += pnrate; + *distortion += pndist; + *sse += pnsse; + *skippable &= pnskip; + + this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion), + RDCOST(x->rdmult, x->rddiv, 0, *sse)); + + if (this_rd > ref_best_rd) { + is_cost_valid = 0; + break; + } + } + + if (!is_cost_valid) { + // reset cost value + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } + + return is_cost_valid; +} +#endif + // Return value 0: early termination triggered, no valid rd cost available; // 1: rd cost values are valid. static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, @@ -2799,8 +2981,13 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); +#if CONFIG_VAR_TX + if (!inter_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, + &sseuv, bsize, ref_best_rd - rdcosty)) { +#else if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, &sseuv, bsize, ref_best_rd - rdcosty)) { +#endif *rate2 = INT_MAX; *distortion = INT64_MAX; restore_dst_buf(xd, orig_dst, orig_dst_stride); @@ -4112,10 +4299,15 @@ void vp10_rd_pick_inter_mode_sub8x8(VP10_COMP *cpi, vp10_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); +#if CONFIG_VAR_TX + if (!inter_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, + &uv_sse, BLOCK_8X8, tmp_best_rdu)) + continue; +#else if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, &uv_sse, BLOCK_8X8, tmp_best_rdu)) continue; - +#endif rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable;