From: Jingning Han Date: Tue, 13 Oct 2015 19:40:39 +0000 (-0700) Subject: Use precise distortion metric X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=71c156070c9fac818bb6d662e760408b9b986741;p=libvpx Use precise distortion metric Rework the rate distortion optimization pipeline. Use precise distortion metric that accounts for the forward and inverse transform rounding effect. Change-Id: Ibe19ce9791ec3547739294cc3012dd9e11f4ea49 --- diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index fe34c6138..223c67ae2 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -69,6 +69,9 @@ typedef struct { } REF_DEFINITION; struct rdcost_block_args { +#if CONFIG_VAR_TX + const VP10_COMP *cpi; +#endif MACROBLOCK *x; ENTROPY_CONTEXT t_above[16]; ENTROPY_CONTEXT t_left[16]; @@ -504,10 +507,41 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, return; if (!is_inter_block(mbmi)) { +#if CONFIG_VAR_TX + struct encode_b_args arg = {x, NULL, &mbmi->skip}; + uint8_t *dst, *src; + int src_stride = x->plane[plane].src.stride; + int dst_stride = xd->plane[plane].dst.stride; + unsigned int tmp_sse; + PREDICTION_MODE mode = (plane == 0) ? + get_y_mode(xd->mi[0], block) : mbmi->uv_mode; + +#if CONFIG_VP9_HIGHBITDEPTH + vp10_encode_block_intra(plane, block, blk_row, blk_col, + plane_bsize, tx_size, &arg); + dist_block(x, plane, block, tx_size, &dist, &sse); +#else + src = &x->plane[plane].src.buf[4 * (blk_row * src_stride + blk_col)]; + dst = &xd->plane[plane].dst.buf[4 * (blk_row * dst_stride + blk_col)]; + vp10_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], + b_height_log2_lookup[plane_bsize], + tx_size, mode, dst, dst_stride, + dst, dst_stride, blk_col, blk_row, plane); + args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride, + dst, dst_stride, &tmp_sse); + sse = (int64_t)tmp_sse * 16; + vp10_encode_block_intra(plane, block, blk_row, blk_col, + plane_bsize, tx_size, &arg); + args->cpi->fn_ptr[txsize_to_bsize[tx_size]].vf(src, src_stride, + dst, dst_stride, &tmp_sse); + dist = (int64_t)tmp_sse * 16; +#endif // CONFIG_VP9_HIGHBITDEPTH +#else struct encode_b_args arg = {x, NULL, &mbmi->skip}; vp10_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, &arg); dist_block(x, plane, block, tx_size, &dist, &sse); +#endif } else if (max_txsize_lookup[plane_bsize] == tx_size) { if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_NONE) { @@ -579,6 +613,9 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, } static void txfm_rd_in_plane(MACROBLOCK *x, +#if CONFIG_VAR_TX + const VP10_COMP *cpi, +#endif int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, @@ -590,6 +627,9 @@ static void txfm_rd_in_plane(MACROBLOCK *x, struct rdcost_block_args args; vp10_zero(args); args.x = x; +#if CONFIG_VAR_TX + args.cpi = cpi; +#endif args.best_rd = ref_best_rd; args.use_fast_coef_costing = use_fast_coef_casting; args.skippable = 1; @@ -650,7 +690,11 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, continue; mbmi->tx_type = tx_type; - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &psse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); @@ -681,7 +725,11 @@ static void choose_largest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, mbmi->tx_type = best_tx_type; #endif // CONFIG_EXT_TX - txfm_rd_in_plane(x, rate, distortion, skip, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + rate, distortion, skip, sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); @@ -707,7 +755,11 @@ static void choose_smallest_tx_size(VP10_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = TX_4X4; - txfm_rd_in_plane(x, rate, distortion, skip, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + rate, distortion, skip, sse, ref_best_rd, 0, bs, mbmi->tx_size, cpi->sf.use_fast_coef_costing); } @@ -789,7 +841,11 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, r_tx_size += vp10_cost_one(tx_probs[m]); } - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &sse, ref_best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing); #if CONFIG_EXT_TX @@ -856,7 +912,11 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = best_tx; #if CONFIG_EXT_TX mbmi->tx_type = best_tx_type; - txfm_rd_in_plane(x, &r, &d, &s, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx, cpi->sf.use_fast_coef_costing); #endif // CONFIG_EXT_TX @@ -1492,33 +1552,121 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, } #if CONFIG_VAR_TX -static void tx_block_rd_b(MACROBLOCK *x, TX_SIZE tx_size, +static void tx_block_rd_b(const VP10_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, int blk_row, int blk_col, int plane, int block, int plane_bsize, int coeff_ctx, int *rate, int64_t *dist, int64_t *bsse, int *skip) { MACROBLOCKD *xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; +#if CONFIG_VP9_HIGHBITDEPTH const int ss_txfrm_size = tx_size << 1; int64_t this_sse; int shift = tx_size == TX_32X32 ? 0 : 2; tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); +#endif + unsigned int tmp_sse = 0; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); const scan_order *const scan_order = get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + BLOCK_SIZE txm_bsize = txsize_to_bsize[tx_size]; + int bh = 4 * num_4x4_blocks_wide_lookup[txm_bsize]; + int src_stride = p->src.stride; + uint8_t *src = &p->src.buf[4 * blk_row * src_stride + 4 * blk_col]; + uint8_t *dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; + DECLARE_ALIGNED(16, uint8_t, rec_buffer[32 * 32]); + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); + vpx_convolve_copy(dst, pd->dst.stride, rec_buffer, 32, + NULL, 0, NULL, 0, bh, bh); + + if (blk_row + (bh >> 2) > max_blocks_high || + blk_col + (bh >> 2) > max_blocks_wide) { + int idx, idy; + unsigned int this_sse; + int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row); + int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col); + for (idy = 0; idy < blocks_height; idy += 2) { + for (idx = 0; idx < blocks_width; idx += 2) { + cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, + src_stride, + rec_buffer + 4 * idy * 32 + 4 * idx, + 32, &this_sse); + tmp_sse += this_sse; + } + } + } else { + cpi->fn_ptr[txm_bsize].vf(src, src_stride, rec_buffer, 32, &tmp_sse); + } + #if CONFIG_VP9_HIGHBITDEPTH *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, xd->bd) >> shift; + *bsse += this_sse >> shift; #else - *dist += vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, - &this_sse) >> shift; + *bsse += (int64_t)tmp_sse * 16; + + if (p->eobs[block] > 0) { + // TODO(jingning): integrate multiple transform type experiment + TX_TYPE tx_type = DCT_DCT; + switch (tx_size) { + case TX_32X32: + vp10_inv_txfm_add_32x32(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_16X16: + vp10_inv_txfm_add_16x16(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_8X8: + vp10_inv_txfm_add_8x8(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type); + break; + case TX_4X4: + vp10_inv_txfm_add_4x4(dqcoeff, rec_buffer, 32, p->eobs[block], + tx_type, + xd->lossless[xd->mi[0]->mbmi.segment_id]); + break; + default: + assert(0 && "Invalid transform size"); + break; + } + + if ((bh >> 2) + blk_col > max_blocks_wide || + (bh >> 2) + blk_row > max_blocks_high) { + int idx, idy; + unsigned int this_sse; + int blocks_height = VPXMIN(bh >> 2, max_blocks_high - blk_row); + int blocks_width = VPXMIN(bh >> 2, max_blocks_wide - blk_col); + tmp_sse = 0; + for (idy = 0; idy < blocks_height; idy += 2) { + for (idx = 0; idx < blocks_width; idx += 2) { + cpi->fn_ptr[BLOCK_8X8].vf(src + 4 * idy * src_stride + 4 * idx, + src_stride, + rec_buffer + 4 * idy * 32 + 4 * idx, + 32, &this_sse); + tmp_sse += this_sse; + } + } + } else { + cpi->fn_ptr[txm_bsize].vf(src, src_stride, + rec_buffer, 32, &tmp_sse); + } + } + *dist += (int64_t)tmp_sse * 16; #endif // CONFIG_VP9_HIGHBITDEPTH - *bsse += this_sse >> shift; *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size, scan_order->scan, scan_order->neighbors, 0); @@ -1594,7 +1742,7 @@ static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) { mbmi->inter_tx_size[tx_idx] = tx_size; - tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block, + tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, coeff_ctx, rate, dist, bsse, skip); if (tx_size > TX_4X4) *rate += vp10_cost_bit(128, 0); @@ -1777,7 +1925,7 @@ static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, break; } coeff_ctx = combine_entropy_contexts(ta[0], tl[0]); - tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block, + tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, coeff_ctx, rate, dist, bsse, skip); for (i = 0; i < (1 << tx_size); ++i) { ta[i] = !(p->eobs[block] == 0); @@ -1912,7 +2060,11 @@ static int super_block_uvrd(const VP10_COMP *cpi, MACROBLOCK *x, *skippable = 1; for (plane = 1; plane < MAX_MB_PLANE; ++plane) { - txfm_rd_in_plane(x, &pnrate, &pndist, &pnskip, &pnsse, + txfm_rd_in_plane(x, +#if CONFIG_VAR_TX + cpi, +#endif + &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing); if (pnrate == INT_MAX) {