From 7e0d0de2113fcfa255803814f5c8872ddf490f65 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Tue, 7 Jul 2015 15:32:27 -0700 Subject: [PATCH] Refactor inverse_transform_block argument list Replace block index with transform type in the argument list. This allows to save an extra fetch to the prediction mode. For pedestrian area 1080p coded at 5 Mbps with single tile, the average decoding speed goes up from 80.55 fps (before the refactoring series) to 81.13 fps. Change-Id: Icbebf84ce63c19c0c92f3690ed201f6c3eab7881 --- vp9/decoder/vp9_decodeframe.c | 131 ++++++++++++++++++++++++++-------- 1 file changed, 102 insertions(+), 29 deletions(-) diff --git a/vp9/decoder/vp9_decodeframe.c b/vp9/decoder/vp9_decodeframe.c index 699f18794..6d2a92b1e 100644 --- a/vp9/decoder/vp9_decodeframe.c +++ b/vp9/decoder/vp9_decodeframe.c @@ -182,35 +182,119 @@ static void read_mv_probs(nmv_context *ctx, int allow_hp, vp9_reader *r) { } } -static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, - TX_SIZE tx_size, uint8_t *dst, int stride, - int eob) { +static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, + const TX_SIZE tx_size, + uint8_t *dst, int stride, + int eob) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + if (eob > 0) { + tran_low_t *const dqcoeff = pd->dqcoeff; +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); + } else { + switch (tx_size) { + case TX_4X4: + vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); + } + } + } else { + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); + } + } +} + +static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, + const TX_TYPE tx_type, + const TX_SIZE tx_size, + uint8_t *dst, int stride, + int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; if (eob > 0) { - TX_TYPE tx_type = DCT_DCT; tran_low_t *const dqcoeff = pd->dqcoeff; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { if (xd->lossless) { - tx_type = DCT_DCT; vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); } else { - const PLANE_TYPE plane_type = pd->plane_type; switch (tx_size) { case TX_4X4: - tx_type = get_tx_type_4x4(plane_type, xd, block); vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); break; case TX_8X8: - tx_type = get_tx_type(plane_type, xd); vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); break; case TX_16X16: - tx_type = get_tx_type(plane_type, xd); vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); break; case TX_32X32: - tx_type = DCT_DCT; vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); break; default: @@ -219,25 +303,19 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, } } else { if (xd->lossless) { - tx_type = DCT_DCT; vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { - const PLANE_TYPE plane_type = pd->plane_type; switch (tx_size) { case TX_4X4: - tx_type = get_tx_type_4x4(plane_type, xd, block); vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_8X8: - tx_type = get_tx_type(plane_type, xd); vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_16X16: - tx_type = get_tx_type(plane_type, xd); vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_32X32: - tx_type = DCT_DCT; vp9_idct32x32_add(dqcoeff, dst, stride, eob); break; default: @@ -248,25 +326,19 @@ static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, } #else if (xd->lossless) { - tx_type = DCT_DCT; vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { - const PLANE_TYPE plane_type = pd->plane_type; switch (tx_size) { case TX_4X4: - tx_type = get_tx_type_4x4(plane_type, xd, block); vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_8X8: - tx_type = get_tx_type(plane_type, xd); vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_16X16: - tx_type = get_tx_type(plane_type, xd); vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_32X32: - tx_type = DCT_DCT; vp9_idct32x32_add(dqcoeff, dst, stride, eob); break; default: @@ -315,14 +387,15 @@ static void predict_and_reconstruct_intra_block(int plane, int block, x, y, plane); if (!mi->mbmi.skip) { + const TX_TYPE tx_type = (plane || xd->lossless) ? + DCT_DCT : intra_mode_to_tx_type_lookup[mode]; const scan_order *sc = (plane || xd->lossless) ? - &vp9_default_scan_orders[tx_size] : - &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; + &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; const int eob = vp9_decode_block_tokens(xd, plane, sc, plane_bsize, x, y, tx_size, args->r, args->seg_id); - inverse_transform_block(xd, plane, block, tx_size, dst, pd->dst.stride, - eob); + inverse_transform_block_intra(xd, plane, tx_type, tx_size, + dst, pd->dst.stride, eob); } } @@ -344,9 +417,9 @@ static void reconstruct_inter_block(int plane, int block, txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &x, &y); eob = vp9_decode_block_tokens(xd, plane, sc, plane_bsize, x, y, tx_size, args->r, args->seg_id); - inverse_transform_block(xd, plane, block, tx_size, - &pd->dst.buf[4 * y * pd->dst.stride + 4 * x], - pd->dst.stride, eob); + inverse_transform_block_inter(xd, plane, tx_size, + &pd->dst.buf[4 * y * pd->dst.stride + 4 * x], + pd->dst.stride, eob); *args->eobtotal += eob; } -- 2.40.0