From 432e875dce65574a3a401db7bbfd9c671e6ce7dd Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Fri, 22 Jan 2016 13:57:28 +0000 Subject: [PATCH] Complete high bitdepth VAR_TX implementation. VAR_TX now works in the high bitdepth configuration. Change-Id: I4114d7d9ed59c598f1e4d35b8e75876c07074ba7 --- vp10/encoder/rdopt.c | 126 ++++++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 56 deletions(-) diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 9e29ce6db..852d1d5aa 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -652,13 +652,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, return; if (!is_inter_block(mbmi)) { -#if CONFIG_VAR_TX struct encode_b_args arg = {x, NULL, &mbmi->skip}; -#if CONFIG_VP9_HIGHBITDEPTH - vp10_encode_block_intra(plane, block, blk_row, blk_col, - plane_bsize, tx_size, &arg); - dist_block(x, plane, block, tx_size, &dist, &sse); -#else +#if CONFIG_VAR_TX uint8_t *dst, *src; int src_stride = x->plane[plane].src.stride; int dst_stride = xd->plane[plane].dst.stride; @@ -680,9 +675,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride, dst, dst_stride, &tmp_sse); dist = (int64_t)tmp_sse * 16; -#endif // CONFIG_VP9_HIGHBITDEPTH #else - struct encode_b_args arg = {x, NULL, &mbmi->skip}; vp10_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, &arg); dist_block(x, plane, block, tx_size, &dist, &sse); @@ -1487,18 +1480,20 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_VAR_TX const int coeff_ctx = combine_entropy_contexts(*(tempa + idx), *(templ + idy)); -#endif +#endif // CONFIG_VAR_TX vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, #if CONFIG_VAR_TX - coeff_ctx, + ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan, + so->neighbors, cpi->sf.use_fast_coef_costing); + *(tempa + idx) = !(p->eobs[block] == 0); + *(templ + idy) = !(p->eobs[block] == 0); #else - tempa + idx, templ + idy, -#endif + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); +#endif // CONFIG_VAR_TX if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next_highbd; vp10_highbd_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), @@ -1511,18 +1506,19 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, #if CONFIG_VAR_TX const int coeff_ctx = combine_entropy_contexts(*(tempa + idx), *(templ + idy)); -#endif +#endif // CONFIG_VAR_TX vp10_highbd_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, #if CONFIG_VAR_TX - coeff_ctx, + ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan, + so->neighbors, cpi->sf.use_fast_coef_costing); + *(tempa + idx) = !(p->eobs[block] == 0); + *(templ + idy) = !(p->eobs[block] == 0); #else - tempa + idx, templ + idy, -#endif - TX_4X4, - so->scan, so->neighbors, + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, + TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); +#endif // CONFIG_VAR_TX distortion += vp10_highbd_block_error( coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused, xd->bd) >> 2; @@ -1555,6 +1551,7 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, next_highbd: {} } + if (best_rd >= rd_thresh) return best_rd; @@ -1604,8 +1601,8 @@ next_highbd: TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); #if CONFIG_VAR_TX - int coeff_ctx = combine_entropy_contexts(*(tempa + idx), - *(templ + idy)); + const int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); #endif vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); @@ -1629,8 +1626,8 @@ next_highbd: TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); #if CONFIG_VAR_TX - int coeff_ctx = combine_entropy_contexts(*(tempa + idx), - *(templ + idy)); + const int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); #endif vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); @@ -2321,12 +2318,6 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, MACROBLOCKD *xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; -#if CONFIG_VP9_HIGHBITDEPTH - const int ss_txfrm_size = tx_size << 1; - int64_t this_sse; - int shift = tx_size == TX_32X32 ? 0 : 2; - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); -#endif unsigned int tmp_sse = 0; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; @@ -2391,35 +2382,59 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse); } -#if CONFIG_VP9_HIGHBITDEPTH - *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, - &this_sse, xd->bd) >> shift; - *bsse += this_sse >> shift; -#else *bsse += (int64_t)tmp_sse * 16; if (p->eobs[block] > 0) { - switch (tx_size) { - case TX_32X32: - vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block], - tx_type); - break; - case TX_16X16: - vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block], + const int lossless = xd->lossless[xd->mi[0]->mbmi.segment_id]; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + const int bd = xd->bd; + switch (tx_size) { + case TX_32X32: + vp10_highbd_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, + p->eobs[block], bd, tx_type); + break; + case TX_16X16: + vp10_highbd_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, + p->eobs[block], bd, tx_type); + break; + case TX_8X8: + vp10_highbd_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, + p->eobs[block], bd, tx_type); + break; + case TX_4X4: + vp10_highbd_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, + p->eobs[block], bd, tx_type, lossless); + break; + default: + assert(0 && "Invalid transform size"); + break; + } + } else { +#else + { +#endif // CONFIG_VP9_HIGHBITDEPTH + switch (tx_size) { + case TX_32X32: + vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_16X16: + vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_8X8: + vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block], tx_type); - break; - case TX_8X8: - vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block], - tx_type); - break; - case TX_4X4: - vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block], - tx_type, - xd->lossless[xd->mi[0]->mbmi.segment_id]); - break; - default: - assert(0 && "Invalid transform size"); - break; + break; + case TX_4X4: + vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type, lossless); + break; + default: + assert(0 && "Invalid transform size"); + break; + } } if ((bh >> 2) + blk_col > max_blocks_wide || @@ -2444,7 +2459,6 @@ void vp10_tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, } } *dist += (int64_t)tmp_sse * 16; -#endif // CONFIG_VP9_HIGHBITDEPTH *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size, scan_order->scan, scan_order->neighbors, 0); -- 2.40.0