From 2cdc12742d3b0f23bb9e7a171c7a1ab4d0b74d1f Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Fri, 9 Oct 2015 09:57:42 -0700 Subject: [PATCH] Rate-distortion optimization for recursive transform block coding This commit enables the rate-distortion optimization for recursive transform block coding scheme. Change-Id: Id6a8336ca847bb3af1e94cbfb51db1f4da12d38f --- vp10/common/pred_common.h | 64 +++++++ vp10/decoder/decodeframe.c | 5 +- vp10/decoder/decodemv.c | 4 +- vp10/encoder/encodeframe.c | 10 ++ vp10/encoder/encodemb.c | 10 +- vp10/encoder/rdopt.c | 356 ++++++++++++++++++++++++++++++++----- vp10/encoder/tokenize.c | 8 +- 7 files changed, 395 insertions(+), 62 deletions(-) diff --git a/vp10/common/pred_common.h b/vp10/common/pred_common.h index 1b55f5384..571783e92 100644 --- a/vp10/common/pred_common.h +++ b/vp10/common/pred_common.h @@ -165,6 +165,70 @@ static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, } } +#if CONFIG_VAR_TX +static void update_tx_counts(VP10_COMMON *cm, MACROBLOCKD *xd, + MB_MODE_INFO *mbmi, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, int blk_row, int blk_col, + TX_SIZE max_tx_size, int ctx, + struct tx_counts *tx_counts) { + const struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_idx]; + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + ++get_tx_counts(max_tx_size, ctx, tx_counts)[tx_size]; + } else { + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + update_tx_counts(cm, xd, mbmi, plane_bsize, + tx_size - 1, offsetr, offsetc, + max_tx_size, ctx, tx_counts); + } + } +} + +static INLINE void inter_block_tx_count_update(VP10_COMMON *cm, + MACROBLOCKD *xd, + MB_MODE_INFO *mbmi, + BLOCK_SIZE plane_bsize, + int ctx, + struct tx_counts *tx_counts) { + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + + for (idy = 0; idy < mi_height; idy += bh) + for (idx = 0; idx < mi_width; idx += bh) + update_tx_counts(cm, xd, mbmi, plane_bsize, max_tx_size, idy, idx, + max_tx_size, ctx, tx_counts); +} +#endif + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index 80c07dee4..565de30a6 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -390,11 +390,11 @@ static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r, int block, int blk_row, int blk_col, TX_SIZE tx_size, int *eob_total) { const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + (blk_col >> (1 - pd->subsampling_x)); TX_SIZE plane_tx_size = plane ? - get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], mbmi->sb_type, - pd->subsampling_x, pd->subsampling_y) : + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, 0, 0) : mbmi->inter_tx_size[tx_idx]; int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; @@ -419,7 +419,6 @@ static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r, pd->dst.stride, eob, block); *eob_total += eob; } else { - const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int bsl = b_width_log2_lookup[bsize]; int i; diff --git a/vp10/decoder/decodemv.c b/vp10/decoder/decodemv.c index 870ab154f..71d622cfa 100644 --- a/vp10/decoder/decodemv.c +++ b/vp10/decoder/decodemv.c @@ -668,7 +668,8 @@ static void read_inter_frame_mode_info(VP10Decoder *const pbi, idy, idx, r); if (xd->counts) { const int ctx = get_tx_size_context(xd); - ++get_tx_counts(max_tx_size, ctx, &xd->counts->tx)[mbmi->tx_size]; + inter_block_tx_count_update(cm, xd, mbmi, bsize, + ctx, &xd->counts->tx); } } else { mbmi->tx_size = read_tx_size(cm, xd, !mbmi->skip || !inter_block, r); @@ -731,7 +732,6 @@ void vp10_read_mode_info(VP10Decoder *const pbi, MACROBLOCKD *xd, read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r); } else { read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r); - for (h = 0; h < y_mis; ++h) { MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index dbe1ec682..83a1645a3 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -2974,8 +2974,18 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, if (cm->tx_mode == TX_MODE_SELECT && mbmi->sb_type >= BLOCK_8X8 && !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) { +#if CONFIG_VAR_TX + int tx_size_ctx = get_tx_size_context(xd); + if (is_inter_block(mbmi)) + inter_block_tx_count_update(cm, xd, mbmi, bsize, + tx_size_ctx, &td->counts->tx); + else + ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), + &td->counts->tx)[mbmi->tx_size]; +#else ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), &td->counts->tx)[mbmi->tx_size]; +#endif } else { int x, y; TX_SIZE tx_size; diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 3f42615a7..5266b3098 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -1530,12 +1530,13 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col, MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int blk_idx = ((blk_row >> 1) << 3) + (blk_col >> 1); + int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); TX_SIZE plane_tx_size = plane ? - get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], mbmi->sb_type, - pd->subsampling_x, pd->subsampling_y) : + get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, + 0, 0) : mbmi->inter_tx_size[blk_idx]; int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; @@ -1553,7 +1554,6 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col, encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); } else { - const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int bsl = b_width_log2_lookup[bsize]; int i; diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index fa6664254..bc142e93f 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -338,7 +338,11 @@ static const int16_t band_counts[TX_SIZES][8] = { }; static int cost_coeffs(MACROBLOCK *x, int plane, int block, +#if CONFIG_VAR_TX + int coeff_ctx, +#else ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L, +#endif TX_SIZE tx_size, const int16_t *scan, const int16_t *nb, int use_fast_coef_costing) { @@ -353,7 +357,11 @@ static int cost_coeffs(MACROBLOCK *x, unsigned int (*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mbmi)]; uint8_t token_cache[32 * 32]; +#if CONFIG_VAR_TX + int pt = coeff_ctx; +#else int pt = combine_entropy_contexts(*A, *L); +#endif int c, cost; #if CONFIG_VP9_HIGHBITDEPTH const int16_t *cat6_high_cost = vp10_get_high_cost_table(xd->bd); @@ -361,9 +369,11 @@ static int cost_coeffs(MACROBLOCK *x, const int16_t *cat6_high_cost = vp10_get_high_cost_table(8); #endif +#if !CONFIG_VAR_TX // Check for consistency of tx_size with mode info assert(type == PLANE_TYPE_Y ? mbmi->tx_size == tx_size : get_uv_tx_size(mbmi, pd) == tx_size); +#endif if (eob == 0) { // single eob token @@ -417,8 +427,10 @@ static int cost_coeffs(MACROBLOCK *x, } } +#if !CONFIG_VAR_TX // is eob first coefficient; *A = *L = (c > 0); +#endif return cost; } @@ -446,10 +458,23 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, static int rate_block(int plane, int block, int blk_row, int blk_col, TX_SIZE tx_size, struct rdcost_block_args* args) { - return cost_coeffs(args->x, plane, block, args->t_above + blk_col, - args->t_left + blk_row, tx_size, - args->so->scan, args->so->neighbors, +#if CONFIG_VAR_TX + int coeff_ctx = combine_entropy_contexts(*(args->t_above + blk_col), + *(args->t_left + blk_row)); + int coeff_cost = cost_coeffs(args->x, plane, block, coeff_ctx, + tx_size, args->so->scan, args->so->neighbors, + args->use_fast_coef_costing); + const struct macroblock_plane *p = &args->x->plane[plane]; + *(args->t_above + blk_col) = !(p->eobs[block] == 0); + *(args->t_left + blk_row) = !(p->eobs[block] == 0); + return coeff_cost; +#else + return cost_coeffs(args->x, plane, block, + args->t_above + blk_col, + args->t_left + blk_row, + tx_size, args->so->scan, args->so->neighbors, args->use_fast_coef_costing); +#endif } static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, @@ -1017,11 +1042,23 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, if (xd->lossless) { TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); +#if CONFIG_VAR_TX + int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); +#endif vp10_fwd_txfm_4x4(src_diff, coeff, 8, DCT_DCT, 1); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, +#if CONFIG_VAR_TX + ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan, + so->neighbors, cpi->sf.use_fast_coef_costing); + *(tempa + idx) = !(p->eobs[block] == 0); + *(templ + idy) = !(p->eobs[block] == 0); +#else + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, + TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); +#endif if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp10_inv_txfm_add_4x4(BLOCK_OFFSET(pd->dqcoeff, block), @@ -1030,11 +1067,22 @@ static int64_t rd_pick_intra4x4block(VP10_COMP *cpi, MACROBLOCK *x, int64_t unused; TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, TX_4X4); const scan_order *so = get_scan(TX_4X4, tx_type, 0); +#if CONFIG_VAR_TX + int coeff_ctx = combine_entropy_contexts(*(tempa + idx), + *(templ + idy)); +#endif vp10_fwd_txfm_4x4(src_diff, coeff, 8, tx_type, 0); vp10_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); - ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, TX_4X4, - so->scan, so->neighbors, - cpi->sf.use_fast_coef_costing); +#if CONFIG_VAR_TX + ratey += cost_coeffs(x, 0, block, coeff_ctx, TX_4X4, so->scan, + so->neighbors, cpi->sf.use_fast_coef_costing); + *(tempa + idx) = !(p->eobs[block] == 0); + *(templ + idy) = !(p->eobs[block] == 0); +#else + ratey += cost_coeffs(x, 0, block, tempa + idx, templ + idy, + TX_4X4, so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); +#endif distortion += vp10_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) @@ -1202,6 +1250,219 @@ static int64_t rd_pick_intra_sby_mode(VP10_COMP *cpi, MACROBLOCK *x, } #if CONFIG_VAR_TX +static void tx_block_rd_b(MACROBLOCK *x, TX_SIZE tx_size, + int blk_row, int blk_col, int plane, int block, + int plane_bsize, int coeff_ctx, + int *rate, int64_t *dist, int64_t *bsse, int *skip) { + MACROBLOCKD *xd = &x->e_mbd; + const struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int ss_txfrm_size = tx_size << 1; + int64_t this_sse; + int shift = tx_size == TX_32X32 ? 0 : 2; + tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *const scan_order = + get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); + + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); + +#if CONFIG_VP9_HIGHBITDEPTH + *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse, xd->bd) >> shift; +#else + *dist += vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, + &this_sse) >> shift; +#endif // CONFIG_VP9_HIGHBITDEPTH + *bsse += this_sse >> shift; + + *rate += cost_coeffs(x, plane, block, coeff_ctx, tx_size, + scan_order->scan, scan_order->neighbors, 0); + *skip &= (p->eobs[block] == 0); +} + +static void select_tx_block(const VP10_COMP *cpi, MACROBLOCK *x, + int blk_row, int blk_col, int plane, int block, + TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + BLOCK_SIZE txb_bsize, + ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, + int *rate, int64_t *dist, + int64_t *bsse, int *skip) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + int64_t this_rd = INT64_MAX; + ENTROPY_CONTEXT ctxa[16], ctxl[16]; + ENTROPY_CONTEXT *pta = ta + blk_col; + ENTROPY_CONTEXT *ptl = tl + blk_row; + int coeff_ctx, i; + + memcpy(ctxa, ta, sizeof(ENTROPY_CONTEXT) * max_blocks_wide); + memcpy(ctxl, tl, sizeof(ENTROPY_CONTEXT) * max_blocks_high); + + switch (tx_size) { + case TX_4X4: + break; + case TX_8X8: + pta[0] = !!*(const uint16_t *)&pta[0]; + ptl[0] = !!*(const uint16_t *)&ptl[0]; + break; + case TX_16X16: + pta[0] = !!*(const uint32_t *)&pta[0]; + ptl[0] = !!*(const uint32_t *)&ptl[0]; + break; + case TX_32X32: + pta[0] = !!*(const uint64_t *)&pta[0]; + ptl[0] = !!*(const uint64_t *)&ptl[0]; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + coeff_ctx = combine_entropy_contexts(pta[0], ptl[0]); + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + *rate = 0; + *dist = 0; + *bsse = 0; + *skip = 1; + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + mbmi->inter_tx_size[tx_idx] = tx_size; + mbmi->tx_size = tx_size; + + if (cpi->common.tx_mode == TX_MODE_SELECT || tx_size == TX_4X4) { + tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block, + plane_bsize, coeff_ctx, rate, dist, bsse, skip); + if (tx_size > TX_4X4) + *rate += vp10_cost_bit(128, 0); + this_rd = RDCOST(x->rdmult, x->rddiv, *rate, *dist); + for (i = 0; i < (1 << tx_size); ++i) { + pta[i] = !(p->eobs[block] == 0); + ptl[i] = !(p->eobs[block] == 0); + } + } + + if (tx_size > TX_4X4) { + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bh = num_4x4_blocks_high_lookup[bsize]; + int sub_step = 1 << (2 * (tx_size - 1)); + int i; + int this_rate, sum_rate = vp10_cost_bit(128, 1); + int64_t this_dist, sum_dist = 0; + int64_t this_bsse, sum_bsse = 0; + int this_skip, all_skip = 1; + int64_t sum_rd; + for (i = 0; i < 4; ++i) { + int offsetr = (i >> 1) * bh / 2; + int offsetc = (i & 0x01) * bh / 2; + select_tx_block(cpi, x, blk_row + offsetr, blk_col + offsetc, + plane, block + i * sub_step, tx_size - 1, + plane_bsize, txsize_to_bsize[tx_size - 1], + ctxa, ctxl, &this_rate, &this_dist, + &this_bsse, &this_skip); + sum_rate += this_rate; + sum_dist += this_dist; + sum_bsse += this_bsse; + all_skip &= this_skip; + } + sum_rd = RDCOST(x->rdmult, x->rddiv, sum_rate, sum_dist); + + if (this_rd < sum_rd) { + int idx, idy; + for (idy = blk_row; idy < blk_row + bh; idy += 2) + for (idx = blk_col; idx < blk_col + bh; idx += 2) + mbmi->inter_tx_size[(idy / 2) * 8 + (idx / 2)] = tx_size; + mbmi->tx_size = tx_size; + } else { + *rate = sum_rate; + *dist = sum_dist; + *bsse = sum_bsse; + *skip = all_skip; + + memcpy(pta, ctxa + (blk_col >> pd->subsampling_x), + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[txb_bsize]); + memcpy(ptl, ctxl + (blk_row >> pd->subsampling_y), + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[txb_bsize]); + } + } +} + +static void inter_block_yrd(const VP10_COMP *cpi, MACROBLOCK *x, + int *rate, int64_t *distortion, int *skippable, + int64_t *sse, BLOCK_SIZE bsize, + int64_t ref_best_rd) { + MACROBLOCKD *const xd = &x->e_mbd; + int is_cost_valid = 1; + int64_t this_rd; + + if (ref_best_rd < 0) + is_cost_valid = 0; + + *rate = 0; + *distortion = 0; + *sse = 0; + *skippable = 1; + + if (is_cost_valid) { + const struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + BLOCK_SIZE txb_size = txsize_to_bsize[max_txsize_lookup[plane_bsize]]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_txsize_lookup[plane_bsize] * 2); + ENTROPY_CONTEXT ctxa[16], ctxl[16]; + + int pnrate = 0, pnskip = 1; + int64_t pndist = 0, pnsse = 0; + + vp10_get_entropy_contexts(bsize, TX_4X4, pd, ctxa, ctxl); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + select_tx_block(cpi, x, idy, idx, 0, block, + max_txsize_lookup[plane_bsize], plane_bsize, txb_size, + ctxa, ctxl, &pnrate, &pndist, &pnsse, &pnskip); + *rate += pnrate; + *distortion += pndist; + *sse += pnsse; + *skippable &= pnskip; + + block += step; + } + } + } + + this_rd = VPXMIN(RDCOST(x->rdmult, x->rddiv, *rate, *distortion), + RDCOST(x->rdmult, x->rddiv, 0, *sse)); + if (this_rd > ref_best_rd) + is_cost_valid = 0; + + if (!is_cost_valid) { + // reset cost value + *rate = INT_MAX; + *distortion = INT64_MAX; + *sse = INT64_MAX; + *skippable = 0; + } +} + static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int plane, int block, TX_SIZE tx_size, BLOCK_SIZE plane_bsize, @@ -1209,11 +1470,14 @@ static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *dist, int64_t *bsse, int *skip) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int tx_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + (blk_col >> (1 - pd->subsampling_x)); TX_SIZE plane_tx_size = plane ? - get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], plane_bsize, 0, 0) : + get_uv_tx_size_impl(mbmi->inter_tx_size[tx_idx], bsize, + 0, 0) : mbmi->inter_tx_size[tx_idx]; int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; @@ -1228,32 +1492,9 @@ static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, return; if (tx_size == plane_tx_size) { - const int ss_txfrm_size = tx_size << 1; - const struct macroblock_plane *const p = &x->plane[plane]; - int64_t this_sse; - int shift = tx_size == TX_32X32 ? 0 : 2; - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + int coeff_ctx, i; ENTROPY_CONTEXT *ta = above_ctx + blk_col; - ENTROPY_CONTEXT *tl = left_ctx + blk_row; - PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - const scan_order *const scan_order = - get_scan(tx_size, tx_type, is_inter_block(&xd->mi[0]->mbmi)); - int i; - - vp10_xform_quant(x, plane, block, blk_row, blk_col, - plane_bsize, tx_size); - -#if CONFIG_VP9_HIGHBITDEPTH - *dist += vp10_highbd_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, - &this_sse, xd->bd) >> shift; -#else - *dist += vp10_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, - &this_sse) >> shift; -#endif // CONFIG_VP9_HIGHBITDEPTH - *bsse += this_sse >> shift; - + ENTROPY_CONTEXT *tl = left_ctx + blk_row; switch (tx_size) { case TX_4X4: break; @@ -1273,17 +1514,14 @@ static void tx_block_rd(const VP10_COMP *cpi, MACROBLOCK *x, assert(0 && "Invalid transform size."); break; } - - *rate += cost_coeffs(x, plane, block, ta, tl, tx_size, - scan_order->scan, scan_order->neighbors, 0); - + coeff_ctx = combine_entropy_contexts(ta[0], tl[0]); + tx_block_rd_b(x, tx_size, blk_row, blk_col, plane, block, + plane_bsize, coeff_ctx, rate, dist, bsse, skip); for (i = 0; i < (1 << tx_size); ++i) { - ta[i] = ta[0]; - tl[i] = tl[0]; + ta[i] = !(p->eobs[block] == 0); + tl[i] = !(p->eobs[block] == 0); } - *skip &= (p->eobs[block] == 0); } else { - BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int bsl = b_width_log2_lookup[bsize]; int step = 1 << (2 * (tx_size - 1)); int i; @@ -1639,8 +1877,14 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi, for (idx = 0; idx < width / 4; ++idx) { int64_t ssz, rd, rd1, rd2; tran_low_t* coeff; - +#if CONFIG_VAR_TX + int coeff_ctx; +#endif k += (idy * 2 + idx); +#if CONFIG_VAR_TX + coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), + *(tl + (k >> 1))); +#endif coeff = BLOCK_OFFSET(p->coeff, k); fwd_txm4x4(vp10_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), coeff, 8); @@ -1659,9 +1903,19 @@ static int64_t encode_inter_mb_segment(VP10_COMP *cpi, 16, &ssz); #endif // CONFIG_VP9_HIGHBITDEPTH thissse += ssz; - thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), TX_4X4, +#if CONFIG_VAR_TX + thisrate += cost_coeffs(x, 0, k, coeff_ctx, + TX_4X4, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); + *(ta + (k & 1)) = !(p->eobs[k] == 0); + *(tl + (k >> 1)) = !(p->eobs[k] == 0); +#else + thisrate += cost_coeffs(x, 0, k, ta + (k & 1), tl + (k >> 1), + TX_4X4, + so->scan, so->neighbors, + cpi->sf.use_fast_coef_costing); +#endif rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); rd = VPXMIN(rd1, rd2); @@ -2959,13 +3213,19 @@ static int64_t handle_inter_mode(VP10_COMP *cpi, MACROBLOCK *x, // Y cost and distortion vp10_subtract_plane(x, bsize, 0); - +#if CONFIG_VAR_TX + if (cm->tx_mode == TX_MODE_SELECT) { + inter_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, + bsize, ref_best_rd); + } else { + super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, + bsize, ref_best_rd); + for (i = 0; i < 64; ++i) + mbmi->inter_tx_size[i] = mbmi->tx_size; + } +#else super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, ref_best_rd); - -#if CONFIG_VAR_TX - for (i = 0; i < 64; ++i) - mbmi->inter_tx_size[i] = mbmi->tx_size; #endif if (*rate_y == INT_MAX) { diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index d20196c02..3ea986a3f 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -622,10 +622,11 @@ void tokenize_tx(ThreadData *td, TOKENEXTRA **t, MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int blk_idx = ((blk_row >> 1) << 3) + (blk_col >> 1); + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int blk_idx = (blk_row >> (1 - pd->subsampling_y)) * 8 + + (blk_col >> (1 - pd->subsampling_x)); TX_SIZE plane_tx_size = plane ? - get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], mbmi->sb_type, - pd->subsampling_x, pd->subsampling_y) : + get_uv_tx_size_impl(mbmi->inter_tx_size[blk_idx], bsize, 0, 0) : mbmi->inter_tx_size[blk_idx]; int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; @@ -647,7 +648,6 @@ void tokenize_tx(ThreadData *td, TOKENEXTRA **t, set_entropy_context_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); } else { - const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; int bsl = b_width_log2_lookup[bsize]; int i; -- 2.50.1