From caeb10bf06f758d317b2e76d274941e79414784e Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Thu, 22 Oct 2015 17:25:00 -0700 Subject: [PATCH] Use explicit block position in foreach_transformed_block Add the row and column index to the argument list of unit functions called by foreach_transformed_block wrapper. This avoids the repeated internal parsing according to the block index. Change-Id: Ie7508acdac0b498487564639bc5cc6378a8a0df7 --- vp10/common/blockd.c | 2 +- vp10/common/blockd.h | 12 +------ vp10/encoder/encodemb.c | 71 ++++++++++++++++++++--------------------- vp10/encoder/encodemb.h | 14 +++++--- vp10/encoder/rdopt.c | 25 ++++++++------- vp10/encoder/tokenize.c | 28 ++++++++-------- 6 files changed, 73 insertions(+), 79 deletions(-) diff --git a/vp10/common/blockd.c b/vp10/common/blockd.c index 5394b5e0e..b6f910ff6 100644 --- a/vp10/common/blockd.c +++ b/vp10/common/blockd.c @@ -66,7 +66,7 @@ void vp10_foreach_transformed_block_in_plane( for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { - visit(plane, i, plane_bsize, tx_size, arg); + visit(plane, i, r, c, plane_bsize, tx_size, arg); i += step; } i += extra_step; diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index 84541545a..b89d79180 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -283,6 +283,7 @@ static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { } typedef void (*foreach_transformed_block_visitor)(int plane, int block, + int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); @@ -296,17 +297,6 @@ void vp10_foreach_transformed_block( const MACROBLOCKD* const xd, BLOCK_SIZE bsize, foreach_transformed_block_visitor visit, void *arg); -static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, int block, - int *x, int *y) { - const int bwl = b_width_log2_lookup[plane_bsize]; - const int tx_cols_log2 = bwl - tx_size; - const int tx_cols = 1 << tx_cols_log2; - const int raster_mb = block >> (tx_size << 1); - *x = (raster_mb & (tx_cols - 1)) << tx_size; - *y = (raster_mb >> tx_cols_log2) << tx_size; -} - void vp10_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff); diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index ff23fee27..ec6b34a61 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -324,7 +324,8 @@ static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, #endif // CONFIG_VP9_HIGHBITDEPTH void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -336,10 +337,8 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -424,7 +423,8 @@ void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, } void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -433,11 +433,8 @@ void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -658,7 +655,8 @@ static void highbd_fwd_txfm_32x32(int rd_transform, const int16_t *src_diff, #endif // CONFIG_VP9_HIGHBITDEPTH void vp10_xform_quant(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -670,10 +668,8 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; - int i, j; const int16_t *src_diff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -751,7 +747,8 @@ void vp10_xform_quant(MACROBLOCK *x, int plane, int block, } } -static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, +static void encode_block(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args *const args = arg; MACROBLOCK *const x = args->x; @@ -760,14 +757,12 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int i, j; uint8_t *dst; ENTROPY_CONTEXT *a, *l; TX_TYPE tx_type = get_tx_type(pd->plane_type, xd, block); - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; - a = &ctx->ta[plane][i]; - l = &ctx->tl[plane][j]; + dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; + a = &ctx->ta[plane][blk_col]; + l = &ctx->tl[plane][blk_row]; // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. @@ -786,17 +781,20 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, *a = *l = 0; return; } else { - vp10_xform_quant_fp(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant_fp(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); } } else { if (max_txsize_lookup[plane_bsize] == tx_size) { int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { // full forward transform and quantization - vp10_xform_quant(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { // fast path forward transform and quantization - vp10_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant_dc(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); } else { // skip forward transform p->eobs[block] = 0; @@ -804,7 +802,8 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, return; } } else { - vp10_xform_quant(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); } } } @@ -879,19 +878,18 @@ static void encode_block(int plane, int block, BLOCK_SIZE plane_bsize, } } -static void encode_block_pass1(int plane, int block, BLOCK_SIZE plane_bsize, +static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { MACROBLOCK *const x = (MACROBLOCK *)arg; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - int i, j; uint8_t *dst; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * j * pd->dst.stride + 4 * i]; + dst = &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col]; - vp10_xform_quant(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH @@ -948,8 +946,9 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { } } -void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { +void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { struct encode_b_args* const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; @@ -971,15 +970,13 @@ void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, uint16_t *eob = &p->eobs[block]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; - int i, j; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &i, &j); - dst = &pd->dst.buf[4 * (j * dst_stride + i)]; - src = &p->src.buf[4 * (j * src_stride + i)]; - src_diff = &p->src_diff[4 * (j * diff_stride + i)]; + dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; + src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; + src_diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mbmi->uv_mode; vp10_predict_intra_block(xd, bwl, bhl, tx_size, mode, dst, dst_stride, - dst, dst_stride, i, j, plane); + dst, dst_stride, blk_col, blk_row, plane); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { @@ -1130,5 +1127,5 @@ void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { struct encode_b_args arg = {x, NULL, &xd->mi[0]->mbmi.skip}; vp10_foreach_transformed_block_in_plane(xd, bsize, plane, - vp10_encode_block_intra, &arg); + vp10_encode_block_intra, &arg); } diff --git a/vp10/encoder/encodemb.h b/vp10/encoder/encodemb.h index 62a7db4a2..2e6516e0b 100644 --- a/vp10/encoder/encodemb.h +++ b/vp10/encoder/encodemb.h @@ -26,16 +26,20 @@ struct encode_b_args { void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize); void vp10_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); void vp10_xform_quant_fp(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size); + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp10_xform_quant_dc(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size); + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp10_xform_quant(MACROBLOCK *x, int plane, int block, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size); + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp10_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); -void vp10_encode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg); +void vp10_encode_block_intra(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg); void vp10_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 2e000af65..cecc59c8f 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -451,18 +451,16 @@ static void dist_block(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, *out_sse = this_sse >> shift; } -static int rate_block(int plane, int block, BLOCK_SIZE plane_bsize, +static int rate_block(int plane, int block, int blk_row, int blk_col, TX_SIZE tx_size, struct rdcost_block_args* args) { - int x_idx, y_idx; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x_idx, &y_idx); - - return cost_coeffs(args->x, plane, block, args->t_above + x_idx, - args->t_left + y_idx, tx_size, + return cost_coeffs(args->x, plane, block, args->t_above + blk_col, + args->t_left + blk_row, tx_size, args->so->scan, args->so->neighbors, args->use_fast_coef_costing); } -static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, +static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; @@ -478,20 +476,23 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, if (!is_inter_block(mbmi)) { struct encode_b_args arg = {x, NULL, &mbmi->skip}; - vp10_encode_block_intra(plane, block, plane_bsize, tx_size, &arg); + vp10_encode_block_intra(plane, block, blk_row, blk_col, + plane_bsize, tx_size, &arg); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (max_txsize_lookup[plane_bsize] == tx_size) { if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_NONE) { // full forward transform and quantization - vp10_xform_quant(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); dist_block(x, plane, block, tx_size, &dist, &sse); } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] == SKIP_TXFM_AC_ONLY) { // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); - vp10_xform_quant_dc(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant_dc(x, plane, block, blk_row, blk_col, + plane_bsize, tx_size); sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; dist = sse; if (x->plane[plane].eobs[block]) { @@ -515,7 +516,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, } } else { // full forward transform and quantization - vp10_xform_quant(x, plane, block, plane_bsize, tx_size); + vp10_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); dist_block(x, plane, block, tx_size, &dist, &sse); } @@ -525,7 +526,7 @@ static void block_rd_txfm(int plane, int block, BLOCK_SIZE plane_bsize, return; } - rate = rate_block(plane, block, plane_bsize, tx_size, args); + rate = rate_block(plane, block, blk_row, blk_col, tx_size, args); rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index 2c9998ba0..e568c0ba5 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -443,7 +443,9 @@ struct tokenize_b_args { TOKENEXTRA **tp; }; -static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, +static void set_entropy_context_b(int plane, int block, + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; ThreadData *const td = args->td; @@ -451,10 +453,8 @@ static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; - int aoff, loff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); vp10_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, - aoff, loff); + blk_col, blk_row); } static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree, @@ -520,7 +520,8 @@ void vp10_tokenize_palette_sb(struct ThreadData *const td, } } -static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, +static void tokenize_b(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; VP10_COMP *cpi = args->cpi; @@ -553,11 +554,8 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); int16_t token; EXTRABIT extra; - int aoff, loff; - txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); - - pt = get_entropy_context(tx_size, pd->above_context + aoff, - pd->left_context + loff); + pt = get_entropy_context(tx_size, pd->above_context + blk_col, + pd->left_context + blk_row); scan = so->scan; nb = so->neighbors; c = 0; @@ -597,20 +595,22 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, *tp = t; - vp10_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, aoff, loff); + vp10_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, blk_col, blk_row); } struct is_skippable_args { uint16_t *eobs; int *skippable; }; -static void is_skippable(int plane, int block, +static void is_skippable(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; (void)plane; (void)plane_bsize; (void)tx_size; + (void)blk_row; + (void)blk_col; args->skippable[0] &= (!args->eobs[block]); } @@ -624,13 +624,15 @@ int vp10_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { return result; } -static void has_high_freq_coeff(int plane, int block, +static void has_high_freq_coeff(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; int eobs = (tx_size == TX_4X4) ? 3 : 10; (void) plane; (void) plane_bsize; + (void) blk_row; + (void) blk_col; *(args->skippable) |= (args->eobs[block] > eobs); } -- 2.40.0