From 52bb9dd45c3ed1588361aa166b465ff9f3b3fa63 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Tue, 6 Oct 2015 17:02:34 -0700 Subject: [PATCH] Make tokenization process support recursive transform block coding This commit makes the transform, quantization, tokenization and their corresponding inverse operations support recursive transform block coding process. Change-Id: I71f2ef3a7c2d3db7cfc63c1fd3f1337e8e0360b5 --- vp10/decoder/decodeframe.c | 80 ++++++++++++++++++++++++++-- vp10/encoder/encodeframe.c | 5 ++ vp10/encoder/encodemb.c | 90 ++++++++++++++++++++++++++++--- vp10/encoder/tokenize.c | 106 +++++++++++++++++++++++++++++++++++++ vp10/encoder/tokenize.h | 6 +++ 5 files changed, 277 insertions(+), 10 deletions(-) diff --git a/vp10/decoder/decodeframe.c b/vp10/decoder/decodeframe.c index fa431e049..0e15ad624 100644 --- a/vp10/decoder/decodeframe.c +++ b/vp10/decoder/decodeframe.c @@ -383,6 +383,61 @@ static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, } } +#if CONFIG_VAR_TX +static void decode_reconstruct_tx(MACROBLOCKD *const xd, vpx_reader *r, + MB_MODE_INFO *const mbmi, + int plane, BLOCK_SIZE plane_bsize, + int block, int blk_row, int blk_col, + TX_SIZE tx_size, int *eob_total) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, + pd->subsampling_x, pd->subsampling_y) : mbmi->tx_size; + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + PLANE_TYPE plane_type = (plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV; + TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const scan_order *sc = get_scan(tx_size, tx_type, 1); + const int eob = vp10_decode_block_tokens(xd, plane, sc, + blk_col, blk_row, tx_size, + r, mbmi->segment_id); + inverse_transform_block_inter(xd, plane, tx_size, + &pd->dst.buf[4 * blk_row * pd->dst.stride + 4 * blk_col], + pd->dst.stride, eob, block); + *eob_total += eob; + } else { + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + int step = 1 << (2 * (tx_size - 1)); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + + decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, block + i * step, + offsetr, offsetc, tx_size - 1, eob_total); + } + } +} +#endif + static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, MB_MODE_INFO *const mbmi, int plane, int row, int col, TX_SIZE tx_size) { @@ -872,13 +927,31 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + int row, col; +#if CONFIG_VAR_TX + // TODO(jingning): This can be simplified for decoder performance. + const BLOCK_SIZE plane_bsize = + get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), pd); + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + const int txb_size = txsize_to_bsize[max_tx_size]; + int bw = num_4x4_blocks_wide_lookup[txb_size]; + int block = 0; + const int step = 1 << (max_tx_size << 1); + + for (row = 0; row < num_4x4_h; row += bw) { + for (col = 0; col < num_4x4_w; col += bw) { + decode_reconstruct_tx(xd, r, mbmi, plane, plane_bsize, + block, row, col, max_tx_size, &eobtotal); + block += step; + } + } +#else const TX_SIZE tx_size = plane ? dec_get_uv_tx_size(mbmi, pd->n4_wl, pd->n4_hl) : mbmi->tx_size; - const int num_4x4_w = pd->n4_w; - const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); - int row, col; const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? @@ -888,6 +961,7 @@ static void decode_block(VP10Decoder *const pbi, MACROBLOCKD *const xd, for (col = 0; col < max_blocks_wide; col += step) eobtotal += reconstruct_inter_block(xd, r, mbmi, plane, row, col, tx_size); +#endif } if (!less8x8 && eobtotal == 0) diff --git a/vp10/encoder/encodeframe.c b/vp10/encoder/encodeframe.c index 0f823e9e1..dbe1ec682 100644 --- a/vp10/encoder/encodeframe.c +++ b/vp10/encoder/encodeframe.c @@ -2962,7 +2962,12 @@ static void encode_superblock(VP10_COMP *cpi, ThreadData *td, VPXMAX(bsize, BLOCK_8X8)); vp10_encode_sb(x, VPXMAX(bsize, BLOCK_8X8)); +#if CONFIG_VAR_TX + vp10_tokenize_sb_inter(cpi, td, t, !output_enabled, + mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8)); +#else vp10_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8)); +#endif } if (output_enabled) { diff --git a/vp10/encoder/encodemb.c b/vp10/encoder/encodemb.c index 724e13c25..437870ea5 100644 --- a/vp10/encoder/encodemb.c +++ b/vp10/encoder/encodemb.c @@ -1376,11 +1376,13 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. - if (x->zcoeff_blk[tx_size][block] && plane == 0) { - p->eobs[block] = 0; - *a = *l = 0; - return; - } + // Turn this back on when the rate-distortion loop is synchronized with + // the recursive transform block coding. +// if (x->zcoeff_blk[tx_size][block] && plane == 0) { +// p->eobs[block] = 0; +// *a = *l = 0; +// return; +// } if (!x->skip_recode) { if (x->quant_fp) { @@ -1488,6 +1490,57 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, } } +#if CONFIG_VAR_TX +static void encode_block_inter(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + void *arg) { + struct encode_b_args *const args = arg; + MACROBLOCK *const x = args->x; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + + const struct macroblockd_plane *const pd = &xd->plane[plane]; + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, + pd->subsampling_x, pd->subsampling_y) : mbmi->tx_size; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + encode_block(plane, block, blk_row, blk_col, plane_bsize, + tx_size, arg); + } else { + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + int step = 1 << (2 * (tx_size - 1)); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + + encode_block_inter(plane, block + i * step, offsetr, offsetc, + plane_bsize, tx_size - 1, arg); + } + } +} +#endif + static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { @@ -1541,6 +1594,19 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { return; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { +#if CONFIG_VAR_TX + // TODO(jingning): Clean this up. + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + int txb_size = txsize_to_bsize[max_tx_size]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_tx_size * 2); +#endif if (!x->skip_recode) vp10_subtract_plane(x, bsize, plane); @@ -1548,11 +1614,21 @@ void vp10_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize) { const struct macroblockd_plane* const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mbmi, pd) : mbmi->tx_size; vp10_get_entropy_contexts(bsize, tx_size, pd, - ctx.ta[plane], ctx.tl[plane]); + ctx.ta[plane], ctx.tl[plane]); } +#if CONFIG_VAR_TX + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + encode_block_inter(plane, block, idy, idx, plane_bsize, + max_tx_size, &arg); + block += step; + } + } +#else vp10_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, - &arg); + &arg); +#endif } } diff --git a/vp10/encoder/tokenize.c b/vp10/encoder/tokenize.c index c6a16a03e..f65e17fc2 100644 --- a/vp10/encoder/tokenize.c +++ b/vp10/encoder/tokenize.c @@ -613,6 +613,112 @@ int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { return result; } +#if CONFIG_VAR_TX +void tokenize_tx(ThreadData *td, TOKENEXTRA **t, + int dry_run, TX_SIZE tx_size, BLOCK_SIZE plane_bsize, + int blk_row, int blk_col, int block, int plane, + void *arg) { + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + TX_SIZE plane_tx_size = plane ? + get_uv_tx_size_impl(mbmi->tx_size, mbmi->sb_type, + pd->subsampling_x, pd->subsampling_y) : mbmi->tx_size; + + int max_blocks_high = num_4x4_blocks_high_lookup[plane_bsize]; + int max_blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize]; + if (xd->mb_to_bottom_edge < 0) + max_blocks_high += xd->mb_to_bottom_edge >> (5 + pd->subsampling_y); + if (xd->mb_to_right_edge < 0) + max_blocks_wide += xd->mb_to_right_edge >> (5 + pd->subsampling_x); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) + return; + + if (tx_size == plane_tx_size) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, pd); + if (!dry_run) + tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); + else + set_entropy_context_b(plane, block, blk_row, blk_col, + plane_bsize, tx_size, arg); + } else { + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int bsl = b_width_log2_lookup[bsize]; + int i; + + assert(bsl > 0); + --bsl; + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + ((i >> 1) << bsl); + const int offsetc = blk_col + ((i & 0x01) << bsl); + int step = 1 << (2 * (tx_size - 1)); + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) + continue; + + tokenize_tx(td, t, dry_run, tx_size - 1, plane_bsize, + offsetr, offsetc, block + i * step, plane, arg); + } + } +} + +void vp10_tokenize_sb_inter(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, + int dry_run, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + VP10_COMMON *const cm = &cpi->common; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + TOKENEXTRA *t_backup = *t; + const int ctx = vp10_get_skip_context(xd); + const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id, + SEG_LVL_SKIP); + struct tokenize_b_args arg = {cpi, td, t}; + int plane; + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + if (mbmi->skip) { + if (!dry_run) + td->counts->skip[ctx][1] += skip_inc; + reset_skip_context(xd, bsize); + if (dry_run) + *t = t_backup; + return; + } + + if (!dry_run) + td->counts->skip[ctx][0] += skip_inc; + else + *t = t_backup; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int mi_width = num_4x4_blocks_wide_lookup[plane_bsize]; + const int mi_height = num_4x4_blocks_high_lookup[plane_bsize]; + const TX_SIZE max_tx_size = max_txsize_lookup[plane_bsize]; + int txb_size = txsize_to_bsize[max_tx_size]; + int bh = num_4x4_blocks_wide_lookup[txb_size]; + int idx, idy; + int block = 0; + int step = 1 << (max_tx_size * 2); + + for (idy = 0; idy < mi_height; idy += bh) { + for (idx = 0; idx < mi_width; idx += bh) { + tokenize_tx(td, t, dry_run, max_tx_size, plane_bsize, idy, idx, + block, plane, &arg); + block += step; + } + } + } +} +#endif + void vp10_tokenize_sb(VP10_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize) { VP10_COMMON *const cm = &cpi->common; diff --git a/vp10/encoder/tokenize.h b/vp10/encoder/tokenize.h index 5bad415a9..00d399c44 100644 --- a/vp10/encoder/tokenize.h +++ b/vp10/encoder/tokenize.h @@ -51,6 +51,12 @@ int vp10_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); struct VP10_COMP; struct ThreadData; +#if CONFIG_VAR_TX +void vp10_tokenize_sb_inter(struct VP10_COMP *cpi, struct ThreadData *td, + TOKENEXTRA **t, int dry_run, int mi_row, int mi_col, + BLOCK_SIZE bsize); +#endif + void vp10_tokenize_sb(struct VP10_COMP *cpi, struct ThreadData *td, TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); -- 2.40.0