From: John Koleszar Date: Tue, 2 Apr 2013 21:50:40 +0000 (-0700) Subject: Move qcoeff, dqcoeff from BLOCKD to per-plane data X-Git-Tag: v1.3.0~1106^2~303^2~1 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4c05a051ab574368e552866e28373f8eb03a6f80;p=libvpx Move qcoeff, dqcoeff from BLOCKD to per-plane data Start grouping data per-plane, as part of refactoring to support additional planes, and chroma planes with other-than 4:2:0 subsampling. Change-Id: Idb76a0e23ab239180c818025bae1f36f1608bb23 --- diff --git a/vp9/common/vp9_blockd.h b/vp9/common/vp9_blockd.h index 016244b0c..6fdc021f2 100644 --- a/vp9/common/vp9_blockd.h +++ b/vp9/common/vp9_blockd.h @@ -260,8 +260,6 @@ typedef struct { } MODE_INFO; typedef struct blockd { - int16_t *qcoeff; - int16_t *dqcoeff; uint8_t *predictor; int16_t *diff; int16_t *dequant; @@ -295,15 +293,28 @@ struct scale_factors { #endif }; +enum { MAX_MB_PLANE = 3 }; + +struct mb_plane { + DECLARE_ALIGNED(16, int16_t, qcoeff[64 * 64]); + DECLARE_ALIGNED(16, int16_t, dqcoeff[64 * 64]); +}; + +#define BLOCK_OFFSET(x, i, n) ((x) + (i) * (n)) + +#define MB_SUBBLOCK_FIELD(x, field, i) (\ + ((i) < 16) ? BLOCK_OFFSET((x)->plane[0].field, (i), 16) : \ + ((i) < 20) ? BLOCK_OFFSET((x)->plane[1].field, ((i) - 16), 16) : \ + BLOCK_OFFSET((x)->plane[2].field, ((i) - 20), 16)) + typedef struct macroblockd { DECLARE_ALIGNED(16, int16_t, diff[64*64+32*32*2]); /* from idct diff */ DECLARE_ALIGNED(16, uint8_t, predictor[384]); // unused for superblocks - DECLARE_ALIGNED(16, int16_t, qcoeff[64*64+32*32*2]); - DECLARE_ALIGNED(16, int16_t, dqcoeff[64*64+32*32*2]); DECLARE_ALIGNED(16, uint16_t, eobs[256+64*2]); #if CONFIG_CODE_NONZEROCOUNT DECLARE_ALIGNED(16, uint16_t, nzcs[256+64*2]); #endif + struct mb_plane plane[MAX_MB_PLANE]; /* 16 Y blocks, 4 U, 4 V, each with 16 entries. */ BLOCKD block[24]; @@ -384,8 +395,8 @@ typedef struct macroblockd { void (*itxm_add_y_block)(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd); void (*itxm_add_uv_block)(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dst_u, uint8_t *dst_v, int stride, - struct macroblockd *xd); + uint8_t *pre, uint8_t *dst, int stride, + uint16_t *eobs); struct subpix_fn_table subpix; @@ -681,4 +692,34 @@ static int get_nzc_used(TX_SIZE tx_size) { return (tx_size >= TX_16X16); } #endif + +struct plane_block_idx { + int plane; + int block; +}; + +// TODO(jkoleszar): returning a struct so it can be used in a const context, +// expect to refactor this further later. +static INLINE struct plane_block_idx plane_block_idx(MACROBLOCKD *xd, + int b_idx) { + const BLOCK_SIZE_TYPE sb_type = xd->mode_info_context->mbmi.sb_type; + const int u_offset = 16 << (sb_type * 2); + const int v_offset = 20 << (sb_type * 2); + struct plane_block_idx res; + + if (b_idx < u_offset) { + res.plane = 0; + res.block = b_idx; + } else if (b_idx < v_offset) { + res.plane = 1; + res.block = b_idx - u_offset; + } else { + assert(b_idx < (24 << (sb_type * 2))); + res.plane = 2; + res.block = b_idx - v_offset; + } + return res; +} + + #endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/vp9/common/vp9_invtrans.c b/vp9/common/vp9_invtrans.c index a03a66e33..3c3273396 100644 --- a/vp9/common/vp9_invtrans.c +++ b/vp9/common/vp9_invtrans.c @@ -26,9 +26,11 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { for (i = 0; i < 16; i++) { TX_TYPE tx_type = get_tx_type_4x4(xd, i); if (tx_type != DCT_DCT) { - vp9_short_iht4x4(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), + xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + vp9_inverse_transform_b_4x4(xd, xd->eobs[i], + BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), xd->block[i].diff, 32); } } @@ -37,8 +39,14 @@ void vp9_inverse_transform_mby_4x4(MACROBLOCKD *xd) { void vp9_inverse_transform_mbuv_4x4(MACROBLOCKD *xd) { int i; - for (i = 16; i < 24; i++) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[i], xd->block[i].dqcoeff, + for (i = 16; i < 20; i++) { + vp9_inverse_transform_b_4x4(xd, xd->eobs[i], + BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16), + xd->block[i].diff, 16); + } + for (i = 20; i < 24; i++) { + vp9_inverse_transform_b_4x4(xd, xd->eobs[i], + BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16), xd->block[i].diff, 16); } } @@ -60,19 +68,20 @@ void vp9_inverse_transform_mby_8x8(MACROBLOCKD *xd) { for (i = 0; i < 9; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, i); if (tx_type != DCT_DCT) { - vp9_short_iht8x8(xd->block[i].dqcoeff, xd->block[i].diff, 16, tx_type); + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), + xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), &blockd[i].diff[0], 32); } } for (i = 2; i < 11; i += 8) { TX_TYPE tx_type = get_tx_type_8x8(xd, i); if (tx_type != DCT_DCT) { - vp9_short_iht8x8(xd->block[i + 2].dqcoeff, xd->block[i].diff, - 16, tx_type); + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16), + xd->block[i].diff, 16, tx_type); } else { - vp9_inverse_transform_b_8x8(&blockd[i + 2].dqcoeff[0], + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, i + 2, 16), &blockd[i].diff[0], 32); } } @@ -82,8 +91,12 @@ void vp9_inverse_transform_mbuv_8x8(MACROBLOCKD *xd) { int i; BLOCKD *blockd = xd->block; - for (i = 16; i < 24; i += 4) { - vp9_inverse_transform_b_8x8(&blockd[i].dqcoeff[0], + for (i = 16; i < 20; i += 4) { + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16), + &blockd[i].diff[0], 16); + } + for (i = 20; i < 24; i += 4) { + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16), &blockd[i].diff[0], 16); } } @@ -102,9 +115,10 @@ void vp9_inverse_transform_mby_16x16(MACROBLOCKD *xd) { BLOCKD *bd = &xd->block[0]; TX_TYPE tx_type = get_tx_type_16x16(xd, 0); if (tx_type != DCT_DCT) { - vp9_short_iht16x16(bd->dqcoeff, bd->diff, 16, tx_type); + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), + bd->diff, 16, tx_type); } else { - vp9_inverse_transform_b_16x16(&xd->block[0].dqcoeff[0], + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), &xd->block[0].diff[0], 32); } } @@ -115,7 +129,7 @@ void vp9_inverse_transform_mb_16x16(MACROBLOCKD *xd) { } void vp9_inverse_transform_sby_32x32(MACROBLOCKD *xd) { - vp9_short_idct32x32(xd->dqcoeff, xd->diff, 64); + vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, 0, 16), xd->diff, 64); } void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { @@ -126,11 +140,11 @@ void vp9_inverse_transform_sby_16x16(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 8 + x_idx) * 4); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 32 * 16, 64); } else { - vp9_short_iht16x16(xd->dqcoeff + n * 256, + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 32 * 16, 32, tx_type); } } @@ -144,10 +158,10 @@ void vp9_inverse_transform_sby_8x8(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 8 + x_idx) * 2); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 32 * 8, 64); } else { - vp9_short_iht8x8(xd->dqcoeff + n * 64, + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 32 * 8, 32, tx_type); } } @@ -161,19 +175,20 @@ void vp9_inverse_transform_sby_4x4(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 8 + x_idx); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->eobs[n], + BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 32, 64); } else { - vp9_short_iht4x4(xd->dqcoeff + n * 16, + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 32, 32, tx_type); } } } void vp9_inverse_transform_sbuv_16x16(MACROBLOCKD *xd) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + 1024, + vp9_inverse_transform_b_16x16(xd->plane[1].dqcoeff, xd->diff + 1024, 32); - vp9_inverse_transform_b_16x16(xd->dqcoeff + 1280, + vp9_inverse_transform_b_16x16(xd->plane[2].dqcoeff, xd->diff + 1280, 32); } @@ -183,10 +198,10 @@ void vp9_inverse_transform_sbuv_8x8(MACROBLOCKD *xd) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - vp9_inverse_transform_b_8x8(xd->dqcoeff + 1024 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), xd->diff + 1024 + x_idx * 8 + y_idx * 16 * 8, 32); - vp9_inverse_transform_b_8x8(xd->dqcoeff + 1280 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), xd->diff + 1280 + x_idx * 8 + y_idx * 16 * 8, 32); } @@ -199,11 +214,11 @@ void vp9_inverse_transform_sbuv_4x4(MACROBLOCKD *xd) { const int x_idx = n & 3, y_idx = n >> 2; vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + n], - xd->dqcoeff + 1024 + n * 16, + BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), xd->diff + 1024 + x_idx * 4 + y_idx * 16 * 4, 32); vp9_inverse_transform_b_4x4(xd, xd->eobs[64 + 16 + n], - xd->dqcoeff + 1280 + n * 16, + BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), xd->diff + 1280 + x_idx * 4 + y_idx * 16 * 4, 32); } @@ -215,7 +230,7 @@ void vp9_inverse_transform_sb64y_32x32(MACROBLOCKD *xd) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; - vp9_short_idct32x32(xd->dqcoeff + n * 1024, + vp9_short_idct32x32(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 1024), xd->diff + x_idx * 32 + y_idx * 32 * 64, 128); } } @@ -228,11 +243,11 @@ void vp9_inverse_transform_sb64y_16x16(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_16x16(xd, (y_idx * 16 + x_idx) * 4); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_16x16(xd->dqcoeff + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 64 * 16, 128); } else { - vp9_short_iht16x16(xd->dqcoeff + n * 256, + vp9_short_iht16x16(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 256), xd->diff + x_idx * 16 + y_idx * 64 * 16, 64, tx_type); } } @@ -246,10 +261,10 @@ void vp9_inverse_transform_sb64y_8x8(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * 16 + x_idx) * 2); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_8x8(xd->dqcoeff + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 64 * 8, 128); } else { - vp9_short_iht8x8(xd->dqcoeff + n * 64, + vp9_short_iht8x8(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 64), xd->diff + x_idx * 8 + y_idx * 64 * 8, 64, tx_type); } } @@ -263,19 +278,20 @@ void vp9_inverse_transform_sb64y_4x4(MACROBLOCKD *xd) { const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * 16 + x_idx); if (tx_type == DCT_DCT) { - vp9_inverse_transform_b_4x4(xd, xd->eobs[n], xd->dqcoeff + n * 16, + vp9_inverse_transform_b_4x4(xd, xd->eobs[n], + BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 64, 128); } else { - vp9_short_iht4x4(xd->dqcoeff + n * 16, + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, n, 16), xd->diff + x_idx * 4 + y_idx * 4 * 64, 64, tx_type); } } } void vp9_inverse_transform_sb64uv_32x32(MACROBLOCKD *xd) { - vp9_short_idct32x32(xd->dqcoeff + 4096, + vp9_short_idct32x32(xd->plane[1].dqcoeff, xd->diff + 4096, 64); - vp9_short_idct32x32(xd->dqcoeff + 4096 + 1024, + vp9_short_idct32x32(xd->plane[2].dqcoeff, xd->diff + 4096 + 1024, 64); } @@ -285,9 +301,9 @@ void vp9_inverse_transform_sb64uv_16x16(MACROBLOCKD *xd) { for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1, off = x_idx * 16 + y_idx * 32 * 16; - vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 256), xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_16x16(xd->dqcoeff + 4096 + 1024 + n * 256, + vp9_inverse_transform_b_16x16(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 256), xd->diff + 4096 + 1024 + off, 64); } } @@ -298,9 +314,9 @@ void vp9_inverse_transform_sb64uv_8x8(MACROBLOCKD *xd) { for (n = 0; n < 16; n++) { const int x_idx = n & 3, y_idx = n >> 2, off = x_idx * 8 + y_idx * 32 * 8; - vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 64), xd->diff + 4096 + off, 64); - vp9_inverse_transform_b_8x8(xd->dqcoeff + 4096 + 1024 + n * 64, + vp9_inverse_transform_b_8x8(BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 64), xd->diff + 4096 + 1024 + off, 64); } } @@ -312,10 +328,10 @@ void vp9_inverse_transform_sb64uv_4x4(MACROBLOCKD *xd) { const int x_idx = n & 7, y_idx = n >> 3, off = x_idx * 4 + y_idx * 32 * 4; vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + n], - xd->dqcoeff + 4096 + n * 16, + BLOCK_OFFSET(xd->plane[1].dqcoeff, n, 16), xd->diff + 4096 + off, 64); vp9_inverse_transform_b_4x4(xd, xd->eobs[256 + 64 + n], - xd->dqcoeff + 4096 + 1024 + n * 16, + BLOCK_OFFSET(xd->plane[2].dqcoeff, n, 16), xd->diff + 4096 + 1024 + off, 64); } } diff --git a/vp9/common/vp9_mbpitch.c b/vp9/common/vp9_mbpitch.c index 85ba82dd3..b357c9ac9 100644 --- a/vp9/common/vp9_mbpitch.c +++ b/vp9/common/vp9_mbpitch.c @@ -99,11 +99,6 @@ void vp9_setup_block_dptrs(MACROBLOCKD *mb) { blockd[to].predictor = &mb->predictor[from]; } } - - for (r = 0; r < 24; r++) { - blockd[r].qcoeff = &mb->qcoeff[r * 16]; - blockd[r].dqcoeff = &mb->dqcoeff[r * 16]; - } } void vp9_build_block_doffsets(MACROBLOCKD *mb) { diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 8b6efc384..cf95524e0 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -29,9 +29,6 @@ forward_decls vp9_common_forward_decls prototype void vp9_dequant_idct_add_y_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block_8x8 -prototype void vp9_dequant_idct_add_uv_block_8x8 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_uv_block_8x8 - prototype void vp9_dequant_idct_add_16x16 "int16_t *input, const int16_t *dq, uint8_t *pred, uint8_t *dest, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_16x16 @@ -44,15 +41,12 @@ specialize vp9_dequant_idct_add prototype void vp9_dequant_idct_add_y_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, struct macroblockd *xd" specialize vp9_dequant_idct_add_y_block -prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" +prototype void vp9_dequant_idct_add_uv_block "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int stride, uint16_t *eobs" specialize vp9_dequant_idct_add_uv_block prototype void vp9_dequant_idct_add_32x32 "int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, int pitch, int stride, int eob" specialize vp9_dequant_idct_add_32x32 -prototype void vp9_dequant_idct_add_uv_block_16x16 "int16_t *q, const int16_t *dq, uint8_t *dstu, uint8_t *dstv, int stride, struct macroblockd *xd" -specialize vp9_dequant_idct_add_uv_block_16x16 - # # RECON # @@ -606,8 +600,7 @@ prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" specialize vp9_subtract_b mmx sse2 prototype int vp9_mbuverror "struct macroblock *mb" -specialize vp9_mbuverror mmx sse2 -vp9_mbuverror_sse2=vp9_mbuverror_xmm +specialize vp9_mbuverror prototype void vp9_subtract_b "struct block *be, struct blockd *bd, int pitch" specialize vp9_subtract_b mmx sse2 diff --git a/vp9/decoder/vp9_decodframe.c b/vp9/decoder/vp9_decodframe.c index 7d71ceba3..3cefd8f27 100644 --- a/vp9/decoder/vp9_decodframe.c +++ b/vp9/decoder/vp9_decodframe.c @@ -245,19 +245,23 @@ static void decode_16x16(VP9D_COMP *pbi, MACROBLOCKD *xd, } #endif if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->qcoeff, + vp9_ht_dequant_idct_add_16x16_c(tx_type, xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, 16, xd->dst.y_stride, xd->eobs[0]); } else { - vp9_dequant_idct_add_16x16(xd->qcoeff, xd->block[0].dequant, + vp9_dequant_idct_add_16x16(xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, 16, xd->dst.y_stride, xd->eobs[0]); } - vp9_dequant_idct_add_uv_block_8x8( - xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + + vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, 8, + xd->dst.uv_stride, xd->eobs[16]); + + vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8, + xd->dst.uv_stride, xd->eobs[20]); } static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, @@ -281,7 +285,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, for (i = 0; i < 4; i++) { int ib = vp9_i8x8_block[i]; int idx = (ib & 0x02) ? (ib + 2) : ib; - int16_t *q = xd->block[idx].qcoeff; + int16_t *q = BLOCK_OFFSET(xd->plane[0].qcoeff, idx, 16); int16_t *dq = xd->block[0].dequant; uint8_t *pre = xd->block[ib].predictor; uint8_t *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; @@ -301,7 +305,7 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, } } } else { - vp9_dequant_idct_add_y_block_8x8(xd->qcoeff, + vp9_dequant_idct_add_y_block_8x8(xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, @@ -319,23 +323,31 @@ static void decode_8x8(VP9D_COMP *pbi, MACROBLOCKD *xd, b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->eobs + 16); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 20); } else { - vp9_dequant_idct_add_uv_block_8x8 - (xd->qcoeff + 16 * 16, xd->block[16].dequant, - xd->predictor + 16 * 16, xd->dst.u_buffer, xd->dst.v_buffer, - xd->dst.uv_stride, xd); + vp9_dequant_idct_add_8x8(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, 8, + xd->dst.uv_stride, xd->eobs[16]); + + vp9_dequant_idct_add_8x8(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, 8, + xd->dst.uv_stride, xd->eobs[20]); } #if 0 // def DEC_DEBUG if (dec_debug) { @@ -378,23 +390,27 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, b = &xd->block[ib + iblock[j]]; tx_type = get_tx_type_4x4(xd, ib + iblock[j]); if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + vp9_ht_dequant_idct_add_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[ib + iblock[j]]); } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, ib + iblock[j], 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[ib + iblock[j]]); } } b = &xd->block[16 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, i, 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[16 + i]); b = &xd->block[20 + i]; vp9_intra_uv4x4_predict(xd, b, i8x8mode, b->predictor); - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, i, 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride, xd->eobs[20 + i]); } } else if (mode == B_PRED) { @@ -410,12 +426,14 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_intra4x4_predict(xd, b, b_mode, b->predictor); tx_type = get_tx_type_4x4(xd, i); if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + vp9_ht_dequant_idct_add_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } } @@ -424,27 +442,25 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, vp9_decode_mb_tokens_4x4_uv(pbi, xd, bc); #endif vp9_build_intra_predictors_mbuv(xd); - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->eobs + 16); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 20); } else if (mode == SPLITMV || get_tx_type_4x4(xd, 0) == DCT_DCT) { - xd->itxm_add_y_block(xd->qcoeff, + xd->itxm_add_y_block(xd->plane[0].qcoeff, xd->block[0].dequant, xd->predictor, xd->dst.y_buffer, xd->dst.y_stride, xd); - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->eobs + 16); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 20); } else { #if 0 // def DEC_DEBUG if (dec_debug) { @@ -467,22 +483,23 @@ static void decode_4x4(VP9D_COMP *pbi, MACROBLOCKD *xd, BLOCKD *b = &xd->block[i]; tx_type = get_tx_type_4x4(xd, i); if (tx_type != DCT_DCT) { - vp9_ht_dequant_idct_add_c(tx_type, b->qcoeff, + vp9_ht_dequant_idct_add_c(tx_type, + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } else { - xd->itxm_add(b->qcoeff, b->dequant, b->predictor, + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + b->dequant, b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride, xd->eobs[i]); } } - xd->itxm_add_uv_block(xd->qcoeff + 16 * 16, - xd->block[16].dequant, - xd->predictor + 16 * 16, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, - xd); + xd->itxm_add_uv_block(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16, xd->dst.u_buffer, + xd->dst.uv_stride, xd->eobs + 16); + xd->itxm_add_uv_block(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->predictor + 16 * 16 + 64, xd->dst.v_buffer, + xd->dst.uv_stride, xd->eobs + 20); } } @@ -491,8 +508,6 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { const int uv_size = y_size / 2; const int uv_count = uv_size * uv_size; - const int u_qcoeff_offset = (16 * 16) * y_count; - const int v_qcoeff_offset = u_qcoeff_offset + (16 * 16) * uv_count; const int u_eob_offset = 16 * y_count; const int v_eob_offset = u_eob_offset + 16 * uv_count; int n; @@ -504,7 +519,7 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { const TX_TYPE tx_type = get_tx_type_16x16(mb, (y_idx * (4 * y_size) + x_idx) * 4); if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_16x16(mb->qcoeff + n * 16 * 16, + vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), mb->block[0].dequant , mb->dst.y_buffer + y_offset, mb->dst.y_buffer + y_offset, @@ -512,7 +527,7 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { mb->eobs[n * 16]); } else { vp9_ht_dequant_idct_add_16x16_c(tx_type, - mb->qcoeff + n * 16 * 16, + BLOCK_OFFSET(mb->plane[0].qcoeff, n, 256), mb->block[0].dequant, mb->dst.y_buffer + y_offset, mb->dst.y_buffer + y_offset, @@ -525,13 +540,13 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { const int x_idx = n % uv_size; const int y_idx = n / uv_size; const int uv_offset = (y_idx * 16) * mb->dst.uv_stride + (x_idx * 16); - vp9_dequant_idct_add_16x16(mb->qcoeff + u_qcoeff_offset + n * 16 * 16, + vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[1].qcoeff, n, 256), mb->block[16].dequant, mb->dst.u_buffer + uv_offset, mb->dst.u_buffer + uv_offset, mb->dst.uv_stride, mb->dst.uv_stride, mb->eobs[u_eob_offset + n * 16]); - vp9_dequant_idct_add_16x16(mb->qcoeff + v_qcoeff_offset + n * 16 * 16, + vp9_dequant_idct_add_16x16(BLOCK_OFFSET(mb->plane[2].qcoeff, n, 256), mb->block[20].dequant, mb->dst.v_buffer + uv_offset, mb->dst.v_buffer + uv_offset, @@ -540,13 +555,11 @@ static void decode_sb_16x16(MACROBLOCKD *mb, int y_size) { } } -static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) { +static INLINE void decode_sb_8x8(MACROBLOCKD *xd, int y_size) { const int y_count = y_size * y_size; const int uv_size = y_size / 2; const int uv_count = uv_size * uv_size; - const int u_qcoeff_offset = (8 * 8) * y_count; - const int v_qcoeff_offset = u_qcoeff_offset + (8 * 8) * uv_count; const int u_eob_offset = 4 * y_count; const int v_eob_offset = u_eob_offset + 4 * uv_count; int n; @@ -555,24 +568,24 @@ static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) { for (n = 0; n < y_count; n++) { const int x_idx = n % y_size; const int y_idx = n / y_size; - const int y_offset = (y_idx * 8) * mb->dst.y_stride + (x_idx * 8); - const TX_TYPE tx_type = get_tx_type_8x8(mb, + const int y_offset = (y_idx * 8) * xd->dst.y_stride + (x_idx * 8); + const TX_TYPE tx_type = get_tx_type_8x8(xd, (y_idx * (2 * y_size) + x_idx) * 2); if (tx_type == DCT_DCT) { - vp9_dequant_idct_add_8x8_c(mb->qcoeff + n * 8 * 8, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n * 4]); + vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[n * 4]); } else { vp9_ht_dequant_idct_add_8x8_c(tx_type, - mb->qcoeff + n * 8 * 8, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n * 4]); + BLOCK_OFFSET(xd->plane[0].qcoeff, n, 64), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[n * 4]); } } @@ -580,30 +593,28 @@ static void decode_sb_8x8(MACROBLOCKD *mb, int y_size) { for (n = 0; n < uv_count; n++) { const int x_idx = n % uv_size; const int y_idx = n / uv_size; - const int uv_offset = (y_idx * 8) * mb->dst.uv_stride + (x_idx * 8); - vp9_dequant_idct_add_8x8_c(mb->qcoeff + u_qcoeff_offset + n * 8 * 8, - mb->block[16].dequant, - mb->dst.u_buffer + uv_offset, - mb->dst.u_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[u_eob_offset + n * 4]); - vp9_dequant_idct_add_8x8_c(mb->qcoeff + v_qcoeff_offset + n * 8 * 8, - mb->block[20].dequant, - mb->dst.v_buffer + uv_offset, - mb->dst.v_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[v_eob_offset + n * 4]); + const int uv_offset = (y_idx * 8) * xd->dst.uv_stride + (x_idx * 8); + vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 64), + xd->block[16].dequant, + xd->dst.u_buffer + uv_offset, + xd->dst.u_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->eobs[u_eob_offset + n * 4]); + vp9_dequant_idct_add_8x8_c(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 64), + xd->block[20].dequant, + xd->dst.v_buffer + uv_offset, + xd->dst.v_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->eobs[v_eob_offset + n * 4]); } } -static void decode_sb_4x4(MACROBLOCKD *mb, int y_size) { +static void decode_sb_4x4(MACROBLOCKD *xd, int y_size) { const int y_count = y_size * y_size; const int uv_size = y_size / 2; const int uv_count = uv_size * uv_size; - const int u_qcoeff_offset = (4 * 4) * y_count; - const int v_qcoeff_offset = u_qcoeff_offset + (4 * 4) * uv_count; const int u_eob_offset = y_count; const int v_eob_offset = u_eob_offset + uv_count; int n; @@ -611,42 +622,41 @@ static void decode_sb_4x4(MACROBLOCKD *mb, int y_size) { for (n = 0; n < y_count; n++) { const int x_idx = n % y_size; const int y_idx = n / y_size; - const int y_offset = (y_idx * 4) * mb->dst.y_stride + (x_idx * 4); - const TX_TYPE tx_type = get_tx_type_4x4(mb, y_idx * y_size + x_idx); + const int y_offset = (y_idx * 4) * xd->dst.y_stride + (x_idx * 4); + const TX_TYPE tx_type = get_tx_type_4x4(xd, y_idx * y_size + x_idx); if (tx_type == DCT_DCT) { - mb->itxm_add(mb->qcoeff + n * 4 * 4, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, xd->dst.y_stride, + xd->eobs[n]); } else { vp9_ht_dequant_idct_add_c(tx_type, - mb->qcoeff + n * 4 * 4, - mb->block[0].dequant, - mb->dst.y_buffer + y_offset, - mb->dst.y_buffer + y_offset, - mb->dst.y_stride, mb->dst.y_stride, - mb->eobs[n]); + BLOCK_OFFSET(xd->plane[0].qcoeff, n, 16), + xd->block[0].dequant, + xd->dst.y_buffer + y_offset, + xd->dst.y_buffer + y_offset, + xd->dst.y_stride, + xd->dst.y_stride, + xd->eobs[n]); } } for (n = 0; n < uv_count; n++) { const int x_idx = n % uv_size; const int y_idx = n / uv_size; - const int uv_offset = (y_idx * 4) * mb->dst.uv_stride + (x_idx * 4); - mb->itxm_add(mb->qcoeff + u_qcoeff_offset + n * 4 * 4, - mb->block[16].dequant, - mb->dst.u_buffer + uv_offset, - mb->dst.u_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[u_eob_offset + n]); - mb->itxm_add(mb->qcoeff + v_qcoeff_offset + n * 4 * 4, - mb->block[20].dequant, - mb->dst.v_buffer + uv_offset, - mb->dst.v_buffer + uv_offset, - mb->dst.uv_stride, mb->dst.uv_stride, - mb->eobs[v_eob_offset + n]); + const int uv_offset = (y_idx * 4) * xd->dst.uv_stride + (x_idx * 4); + xd->itxm_add(BLOCK_OFFSET(xd->plane[1].qcoeff, n, 16), + xd->block[16].dequant, + xd->dst.u_buffer + uv_offset, + xd->dst.u_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[u_eob_offset + n]); + xd->itxm_add(BLOCK_OFFSET(xd->plane[2].qcoeff, n, 16), + xd->block[20].dequant, + xd->dst.v_buffer + uv_offset, + xd->dst.v_buffer + uv_offset, + xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[v_eob_offset + n]); } } @@ -698,16 +708,16 @@ static void decode_sb64(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, for (n = 0; n < 4; n++) { const int x_idx = n & 1, y_idx = n >> 1; const int y_offset = x_idx * 32 + y_idx * xd->dst.y_stride * 32; - vp9_dequant_idct_add_32x32(xd->qcoeff + n * 1024, + vp9_dequant_idct_add_32x32(BLOCK_OFFSET(xd->plane[0].qcoeff, n, 1024), xd->block[0].dequant, xd->dst.y_buffer + y_offset, xd->dst.y_buffer + y_offset, xd->dst.y_stride, xd->dst.y_stride, xd->eobs[n * 64]); } - vp9_dequant_idct_add_32x32(xd->qcoeff + 4096, + vp9_dequant_idct_add_32x32(xd->plane[1].qcoeff, xd->block[16].dequant, xd->dst.u_buffer, xd->dst.u_buffer, xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[256]); - vp9_dequant_idct_add_32x32(xd->qcoeff + 4096 + 1024, + vp9_dequant_idct_add_32x32(xd->plane[2].qcoeff, xd->block[20].dequant, xd->dst.v_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->dst.uv_stride, xd->eobs[320]); break; @@ -776,15 +786,18 @@ static void decode_sb32(VP9D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col, } else { switch (xd->mode_info_context->mbmi.txfm_size) { case TX_32X32: - vp9_dequant_idct_add_32x32(xd->qcoeff, xd->block[0].dequant, + vp9_dequant_idct_add_32x32(xd->plane[0].qcoeff, xd->block[0].dequant, xd->dst.y_buffer, xd->dst.y_buffer, xd->dst.y_stride, xd->dst.y_stride, xd->eobs[0]); - vp9_dequant_idct_add_uv_block_16x16_c(xd->qcoeff + 1024, - xd->block[16].dequant, - xd->dst.u_buffer, - xd->dst.v_buffer, - xd->dst.uv_stride, xd); + vp9_dequant_idct_add_16x16(xd->plane[1].qcoeff, xd->block[16].dequant, + xd->dst.u_buffer, xd->dst.u_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->eobs[64]); + vp9_dequant_idct_add_16x16(xd->plane[2].qcoeff, xd->block[16].dequant, + xd->dst.v_buffer, xd->dst.v_buffer, + xd->dst.uv_stride, xd->dst.uv_stride, + xd->eobs[80]); break; case TX_16X16: decode_sb_16x16(xd, 2); @@ -1857,7 +1870,9 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { vp9_build_block_doffsets(xd); // clear out the coeff buffer - vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); + vpx_memset(xd->plane[0].qcoeff, 0, sizeof(xd->plane[0].qcoeff)); + vpx_memset(xd->plane[1].qcoeff, 0, sizeof(xd->plane[1].qcoeff)); + vpx_memset(xd->plane[2].qcoeff, 0, sizeof(xd->plane[2].qcoeff)); // Read the mb_no_coeff_skip flag pc->mb_no_coeff_skip = vp9_read_bit(&header_bc); diff --git a/vp9/decoder/vp9_dequantize.c b/vp9/decoder/vp9_dequantize.c index 9aebcdcfc..c0d1e2adb 100644 --- a/vp9/decoder/vp9_dequantize.c +++ b/vp9/decoder/vp9_dequantize.c @@ -388,14 +388,3 @@ void vp9_dequant_idct_add_32x32_c(int16_t *input, const int16_t *dq, } } } - -void vp9_dequant_idct_add_uv_block_16x16_c(int16_t *q, const int16_t *dq, - uint8_t *dstu, - uint8_t *dstv, - int stride, - MACROBLOCKD *xd) { - vp9_dequant_idct_add_16x16_c(q, dq, dstu, dstu, stride, stride, - xd->eobs[64]); - vp9_dequant_idct_add_16x16_c(q + 256, dq, dstv, dstv, stride, stride, - xd->eobs[80]); -} diff --git a/vp9/decoder/vp9_dequantize.h b/vp9/decoder/vp9_dequantize.h index 933108dae..bb72bb294 100644 --- a/vp9/decoder/vp9_dequantize.h +++ b/vp9/decoder/vp9_dequantize.h @@ -40,10 +40,9 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, unsigned char *pre, - unsigned char *dst_u, - unsigned char *dst_v, + unsigned char *dst, int stride, - struct macroblockd *xd); + uint16_t *eobs); void vp9_ht_dequant_idct_add_c(TX_TYPE tx_type, int16_t *input, const int16_t *dq, unsigned char *pred, unsigned char *dest, diff --git a/vp9/decoder/vp9_detokenize.c b/vp9/decoder/vp9_detokenize.c index e55826379..7801c084a 100644 --- a/vp9/decoder/vp9_detokenize.c +++ b/vp9/decoder/vp9_detokenize.c @@ -389,18 +389,31 @@ static INLINE int decode_sb(VP9D_COMP* const pbi, const int seg_eob = get_eob(xd, segment_id, eob_max); int i, eobtotal = 0; + assert(count == offset * 3 / 2); + // luma blocks for (i = 0; i < offset; i += inc) { const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob, - xd->qcoeff + i * 16, tx_size); + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + tx_size); xd->eobs[i] = c; eobtotal += c; } // chroma blocks - for (i = offset; i < count; i += inc) { + for (i = offset; i < offset * 5 / 4; i += inc) { + const int b = i - offset; + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, + BLOCK_OFFSET(xd->plane[1].qcoeff, b, 16), + tx_size); + xd->eobs[i] = c; + eobtotal += c; + } + for (i = offset * 5 / 4; i < count; i += inc) { + const int b = i - offset * 5 / 4; const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, - xd->qcoeff + i * 16, tx_size); + BLOCK_OFFSET(xd->plane[2].qcoeff, b, 16), + tx_size); xd->eobs[i] = c; eobtotal += c; } @@ -415,20 +428,24 @@ int vp9_decode_sb_tokens(VP9D_COMP* const pbi, case TX_32X32: { // 32x32 luma block const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; + int eobtotal = 0, seg_eob; int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, - get_eob(xd, segment_id, 1024), xd->qcoeff, TX_32X32); + get_eob(xd, segment_id, 1024), + xd->plane[0].qcoeff, TX_32X32); xd->eobs[0] = c; eobtotal += c; // 16x16 chroma blocks seg_eob = get_eob(xd, segment_id, 256); - for (i = 64; i < 96; i += 16) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, - xd->qcoeff + i * 16, TX_16X16); - xd->eobs[i] = c; - eobtotal += c; - } + + c = decode_coefs(pbi, xd, bc, 64, PLANE_TYPE_UV, seg_eob, + xd->plane[1].qcoeff, TX_16X16); + xd->eobs[64] = c; + eobtotal += c; + c = decode_coefs(pbi, xd, bc, 80, PLANE_TYPE_UV, seg_eob, + xd->plane[2].qcoeff, TX_16X16); + xd->eobs[80] = c; + eobtotal += c; return eobtotal; } case TX_16X16: @@ -465,22 +482,26 @@ static int vp9_decode_mb_tokens_16x16(VP9D_COMP* const pbi, MACROBLOCKD* const xd, BOOL_DECODER* const bc) { const int segment_id = xd->mode_info_context->mbmi.segment_id; - int i, eobtotal = 0, seg_eob; + int eobtotal = 0, seg_eob; // Luma block int c = decode_coefs(pbi, xd, bc, 0, PLANE_TYPE_Y_WITH_DC, - get_eob(xd, segment_id, 256), xd->qcoeff, TX_16X16); + get_eob(xd, segment_id, 256), + xd->plane[0].qcoeff, TX_16X16); xd->eobs[0] = c; eobtotal += c; // 8x8 chroma blocks seg_eob = get_eob(xd, segment_id, 64); - for (i = 16; i < 24; i += 4) { - c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_8X8); - xd->eobs[i] = c; - eobtotal += c; - } + + c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV, + seg_eob, xd->plane[1].qcoeff, TX_8X8); + xd->eobs[16] = c; + eobtotal += c; + c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV, + seg_eob, xd->plane[2].qcoeff, TX_8X8); + xd->eobs[20] = c; + eobtotal += c; return eobtotal; } @@ -493,8 +514,9 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, // luma blocks int seg_eob = get_eob(xd, segment_id, 64); for (i = 0; i < 16; i += 4) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, - seg_eob, xd->block[i].qcoeff, TX_8X8); + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_Y_WITH_DC, seg_eob, + BLOCK_OFFSET(xd->plane[0].qcoeff, i, 16), + TX_8X8); xd->eobs[i] = c; eobtotal += c; } @@ -504,19 +526,31 @@ static int vp9_decode_mb_tokens_8x8(VP9D_COMP* const pbi, xd->mode_info_context->mbmi.mode == SPLITMV) { // use 4x4 transform for U, V components in I8X8/splitmv prediction mode seg_eob = get_eob(xd, segment_id, 16); - for (i = 16; i < 24; i++) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_4X4); + for (i = 16; i < 20; i++) { + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, + BLOCK_OFFSET(xd->plane[1].qcoeff, i - 16, 16), + TX_4X4); xd->eobs[i] = c; eobtotal += c; } - } else { - for (i = 16; i < 24; i += 4) { - const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, - seg_eob, xd->block[i].qcoeff, TX_8X8); + for (i = 20; i < 24; i++) { + const int c = decode_coefs(pbi, xd, bc, i, PLANE_TYPE_UV, seg_eob, + BLOCK_OFFSET(xd->plane[2].qcoeff, i - 20, 16), + TX_4X4); xd->eobs[i] = c; eobtotal += c; } + } else { + int c; + + c = decode_coefs(pbi, xd, bc, 16, PLANE_TYPE_UV, seg_eob, + xd->plane[1].qcoeff, TX_8X8); + xd->eobs[16] = c; + eobtotal += c; + c = decode_coefs(pbi, xd, bc, 20, PLANE_TYPE_UV, seg_eob, + xd->plane[2].qcoeff, TX_8X8); + xd->eobs[20] = c; + eobtotal += c; } return eobtotal; @@ -526,7 +560,7 @@ static int decode_coefs_4x4(VP9D_COMP *dx, MACROBLOCKD *xd, BOOL_DECODER* const bc, PLANE_TYPE type, int i, int seg_eob) { const int c = decode_coefs(dx, xd, bc, i, type, seg_eob, - xd->block[i].qcoeff, TX_4X4); + MB_SUBBLOCK_FIELD(xd, qcoeff, i), TX_4X4); xd->eobs[i] = c; return c; } diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index d74b61919..a301a24a7 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -32,35 +32,20 @@ void vp9_dequant_idct_add_y_block_c(int16_t *q, const int16_t *dq, } void vp9_dequant_idct_add_uv_block_c(int16_t *q, const int16_t *dq, - uint8_t *pre, uint8_t *dstu, - uint8_t *dstv, int stride, - MACROBLOCKD *xd) { + uint8_t *pre, uint8_t *dst, + int stride, uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dstu, 8, stride, - xd->eobs[16 + i * 2 + j]); - q += 16; - pre += 4; - dstu += 4; - } - - pre += 32 - 8; - dstu += 4 * stride - 8; - } - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add(q, dq, pre, dstv, 8, stride, - xd->eobs[20 + i * 2 + j]); - q += 16; - pre += 4; - dstv += 4; + vp9_dequant_idct_add(q, dq, pre, dst, 8, stride, eobs[i * 2 + j]); + q += 16; + pre += 4; + dst += 4; } pre += 32 - 8; - dstv += 4 * stride - 8; + dst += 4 * stride - 8; } } @@ -82,19 +67,6 @@ void vp9_dequant_idct_add_y_block_8x8_c(int16_t *q, const int16_t *dq, xd->eobs[12]); } -void vp9_dequant_idct_add_uv_block_8x8_c(int16_t *q, const int16_t *dq, - uint8_t *pre, - uint8_t *dstu, - uint8_t *dstv, - int stride, MACROBLOCKD *xd) { - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstu, 8, stride, xd->eobs[16]); - - q += 64; - pre += 64; - - vp9_dequant_idct_add_8x8_c(q, dq, pre, dstv, 8, stride, xd->eobs[20]); -} - void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, uint8_t *pre, uint8_t *dst, @@ -117,36 +89,22 @@ void vp9_dequant_idct_add_y_block_lossless_c(int16_t *q, const int16_t *dq, void vp9_dequant_idct_add_uv_block_lossless_c(int16_t *q, const int16_t *dq, uint8_t *pre, - uint8_t *dstu, - uint8_t *dstv, + uint8_t *dst, int stride, - MACROBLOCKD *xd) { + uint16_t *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstu, 8, stride, - xd->eobs[16 + i * 2 + j]); - q += 16; - pre += 4; - dstu += 4; - } - - pre += 32 - 8; - dstu += 4 * stride - 8; - } - - for (i = 0; i < 2; i++) { - for (j = 0; j < 2; j++) { - vp9_dequant_idct_add_lossless_c(q, dq, pre, dstv, 8, stride, - xd->eobs[20 + i * 2 + j]); - q += 16; - pre += 4; - dstv += 4; + vp9_dequant_idct_add_lossless_c(q, dq, pre, dst, 8, stride, + eobs[i * 2 + j]); + q += 16; + pre += 4; + dst += 4; } - pre += 32 - 8; - dstv += 4 * stride - 8; + pre += 32 - 8; + dst += 4 * stride - 8; } } diff --git a/vp9/encoder/vp9_asm_enc_offsets.c b/vp9/encoder/vp9_asm_enc_offsets.c index e174a894a..1a770dcf7 100644 --- a/vp9/encoder/vp9_asm_enc_offsets.c +++ b/vp9/encoder/vp9_asm_enc_offsets.c @@ -29,9 +29,7 @@ DEFINE(vp9_block_zbin_extra, offsetof(BLOCK, zbin_extra)); DEFINE(vp9_block_zrun_zbin_boost, offsetof(BLOCK, zrun_zbin_boost)); DEFINE(vp9_block_quant_shift, offsetof(BLOCK, quant_shift)); -DEFINE(vp9_blockd_qcoeff, offsetof(BLOCKD, qcoeff)); DEFINE(vp9_blockd_dequant, offsetof(BLOCKD, dequant)); -DEFINE(vp9_blockd_dqcoeff, offsetof(BLOCKD, dqcoeff)); END diff --git a/vp9/encoder/vp9_encodeintra.c b/vp9/encoder/vp9_encodeintra.c index eddacb872..883038b7e 100644 --- a/vp9/encoder/vp9_encodeintra.c +++ b/vp9/encoder/vp9_encodeintra.c @@ -16,6 +16,8 @@ #include "vp9/common/vp9_invtrans.h" #include "vp9/encoder/vp9_encodeintra.h" +static void encode_intra4x4block(MACROBLOCK *x, int ib); + int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { MB_MODE_INFO * mbmi = &x->e_mbd.mode_info_context->mbmi; (void) cpi; @@ -31,18 +33,21 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred) { for (i = 0; i < 16; i++) { x->e_mbd.block[i].bmi.as_mode.first = B_DC_PRED; - vp9_encode_intra4x4block(x, i); + encode_intra4x4block(x, i); } } return vp9_get_mb_ss(x->src_diff); } -void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { +static void encode_intra4x4block(MACROBLOCK *x, int ib) { BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; + MACROBLOCKD * const xd = &x->e_mbd; TX_TYPE tx_type; + assert(ib < 16); + #if CONFIG_NEWBINTRAMODES b->bmi.as_mode.context = vp9_find_bpred_context(&x->e_mbd, b); #endif @@ -54,12 +59,14 @@ void vp9_encode_intra4x4block(MACROBLOCK *x, int ib) { if (tx_type != DCT_DCT) { vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(x, ib, tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); + vp9_short_iht4x4(BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), + b->diff, 16, tx_type); } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, ib); vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], - b->dqcoeff, b->diff, 32); + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), + b->diff, 32); } vp9_recon_b(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); @@ -69,7 +76,7 @@ void vp9_encode_intra4x4mby(MACROBLOCK *mb) { int i; for (i = 0; i < 16; i++) - vp9_encode_intra4x4block(mb, i); + encode_intra4x4block(mb, i); } void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x) { @@ -151,41 +158,47 @@ void vp9_encode_intra8x8(MACROBLOCK *x, int ib) { if (xd->mode_info_context->mbmi.txfm_size == TX_8X8) { int idx = (ib & 0x02) ? (ib + 2) : ib; + int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16); + assert(idx < 16); tx_type = get_tx_type_8x8(xd, ib); if (tx_type != DCT_DCT) { vp9_short_fht8x8(be->src_diff, (x->block + idx)->coeff, 16, tx_type); x->quantize_b_8x8(x, idx, tx_type); - vp9_short_iht8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, + vp9_short_iht8x8(dqcoeff, xd->block[ib].diff, 16, tx_type); } else { x->fwd_txm8x8(be->src_diff, (x->block + idx)->coeff, 32); x->quantize_b_8x8(x, idx, DCT_DCT); - vp9_short_idct8x8(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); + vp9_short_idct8x8(dqcoeff, xd->block[ib].diff, 32); } } else { for (i = 0; i < 4; i++) { + int idx = ib + iblock[i]; + int16_t * const dqcoeff = BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16); + + assert(idx < 16); b = &xd->block[ib + iblock[i]]; be = &x->block[ib + iblock[i]]; tx_type = get_tx_type_4x4(xd, ib + iblock[i]); if (tx_type != DCT_DCT) { vp9_short_fht4x4(be->src_diff, be->coeff, 16, tx_type); vp9_ht_quantize_b_4x4(x, ib + iblock[i], tx_type); - vp9_short_iht4x4(b->dqcoeff, b->diff, 16, tx_type); + vp9_short_iht4x4(dqcoeff, b->diff, 16, tx_type); } else if (!(i & 1) && get_tx_type_4x4(xd, ib + iblock[i] + 1) == DCT_DCT) { x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(x, ib + iblock[i], ib + iblock[i] + 1); vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], - b->dqcoeff, b->diff, 32); + dqcoeff, b->diff, 32); vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i] + 1], - (b + 1)->dqcoeff, (b + 1)->diff, 32); + dqcoeff + 16, (b + 1)->diff, 32); i++; } else { x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, ib + iblock[i]); vp9_inverse_transform_b_4x4(xd, xd->eobs[ib + iblock[i]], - b->dqcoeff, b->diff, 32); + dqcoeff, b->diff, 32); } } } @@ -206,9 +219,12 @@ void vp9_encode_intra8x8mby(MACROBLOCK *x) { } static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { + MACROBLOCKD * const xd = &x->e_mbd; BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; + int16_t * const dqcoeff = MB_SUBBLOCK_FIELD(xd, dqcoeff, ib); + assert(ib >= 16 && ib < 24); vp9_intra_uv4x4_predict(&x->e_mbd, b, mode, b->predictor); vp9_subtract_b(be, b, 8); @@ -216,7 +232,7 @@ static void encode_intra_uv4x4(MACROBLOCK *x, int ib, int mode) { x->fwd_txm4x4(be->src_diff, be->coeff, 16); x->quantize_b_4x4(x, ib); vp9_inverse_transform_b_4x4(&x->e_mbd, x->e_mbd.eobs[ib], - b->dqcoeff, b->diff, 16); + dqcoeff, b->diff, 16); vp9_recon_uv_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride); diff --git a/vp9/encoder/vp9_encodeintra.h b/vp9/encoder/vp9_encodeintra.h index 0b19b5652..6576c94d2 100644 --- a/vp9/encoder/vp9_encodeintra.h +++ b/vp9/encoder/vp9_encodeintra.h @@ -17,7 +17,6 @@ int vp9_encode_intra(VP9_COMP *cpi, MACROBLOCK *x, int use_16x16_pred); void vp9_encode_intra16x16mby(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra16x16mbuv(VP9_COMMON *const cm, MACROBLOCK *x); void vp9_encode_intra4x4mby(MACROBLOCK *mb); -void vp9_encode_intra4x4block(MACROBLOCK *x, int ib); void vp9_encode_intra8x8mby(MACROBLOCK *x); void vp9_encode_intra8x8mbuv(MACROBLOCK *x); void vp9_encode_intra8x8(MACROBLOCK *x, int ib); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 27015773f..a30268886 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -549,9 +549,10 @@ static void optimize_b(VP9_COMMON *const cm, MACROBLOCKD *const xd = &mb->e_mbd; vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; + const struct plane_block_idx pb_idx = plane_block_idx(xd, ib); const int16_t *coeff_ptr = mb->coeff + ib * 16; - int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + ib * 16; + int16_t *qcoeff_ptr; + int16_t *dqcoeff_ptr; int eob = xd->eobs[ib], final_eob, sz = 0; const int i0 = 0; int rc, x, next, i; @@ -582,6 +583,8 @@ static void optimize_b(VP9_COMMON *const cm, nzc0 = nzc1 = nzc; #endif + dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16); + qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16); switch (tx_size) { default: case TX_4X4: { diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 881fce50f..826bee4c3 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -39,8 +39,9 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { int zbin; int x, y, z, sz; int16_t *coeff_ptr = mb->coeff + b_idx * 16; - int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16; + // ht is luma-only + int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16); + int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16); int16_t *zbin_boost_ptr = b->zrun_zbin_boost; int16_t *zbin_ptr = b->zbin; int16_t *round_ptr = b->round; @@ -110,14 +111,17 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx) { MACROBLOCKD *const xd = &mb->e_mbd; const int c_idx = plane_idx(xd, b_idx); + const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; int i, rc, eob; int zbin; int x, y, z, sz; int16_t *coeff_ptr = mb->coeff + b_idx * 16; - int16_t *qcoeff_ptr = xd->qcoeff + b_idx * 16; - int16_t *dqcoeff_ptr = xd->dqcoeff + b_idx * 16; + int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); + int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, + pb_idx.block, 16); int16_t *zbin_boost_ptr = b->zrun_zbin_boost; int16_t *zbin_ptr = b->zbin; int16_t *round_ptr = b->round; @@ -186,9 +190,13 @@ void vp9_quantize_mby_4x4(MACROBLOCK *x) { void vp9_quantize_mbuv_4x4(MACROBLOCK *x) { int i; + const MACROBLOCKD * const xd = &x->e_mbd; + const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type; + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; for (i = 16; i < 24; i++) x->quantize_b_4x4(x, i); + xd->mode_info_context->mbmi.sb_type = real_sb_type; } void vp9_quantize_mb_4x4(MACROBLOCK *x) { @@ -198,9 +206,12 @@ void vp9_quantize_mb_4x4(MACROBLOCK *x) { void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { MACROBLOCKD *const xd = &mb->e_mbd; - int16_t *qcoeff_ptr = xd->qcoeff + 16 * b_idx; - int16_t *dqcoeff_ptr = xd->dqcoeff + 16 * b_idx; + const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx); const int c_idx = plane_idx(xd, b_idx); + int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); + int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, + pb_idx.block, 16); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; const int *pt_scan; @@ -323,6 +334,9 @@ void vp9_quantize_mby_8x8(MACROBLOCK *x) { void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { int i; + const MACROBLOCKD * const xd = &x->e_mbd; + const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type; + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; #if CONFIG_CODE_NONZEROCOUNT for (i = 16; i < 24; i ++) { @@ -331,6 +345,7 @@ void vp9_quantize_mbuv_8x8(MACROBLOCK *x) { #endif for (i = 16; i < 24; i += 4) x->quantize_b_8x8(x, i, DCT_DCT); + xd->mode_info_context->mbmi.sb_type = real_sb_type; } void vp9_quantize_mb_8x8(MACROBLOCK *x) { @@ -418,6 +433,7 @@ static void quantize(int16_t *zbin_boost_orig_ptr, void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { MACROBLOCKD *const xd = &mb->e_mbd; const int c_idx = plane_idx(xd, b_idx); + const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; const int *pt_scan; @@ -438,8 +454,8 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { mb->coeff + 16 * b_idx, 256, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, - xd->qcoeff + 16 * b_idx, - xd->dqcoeff + 16 * b_idx, + BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), + BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), d->dequant, b->zbin_extra, &xd->eobs[b_idx], @@ -452,6 +468,7 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) { MACROBLOCKD *const xd = &mb->e_mbd; const int c_idx = plane_idx(xd, b_idx); + const struct plane_block_idx pb_idx = plane_block_idx(xd, b_idx); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; @@ -460,8 +477,8 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx) { 1024, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, - xd->qcoeff + b_idx * 16, - xd->dqcoeff + b_idx * 16, + BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), + BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16), d->dequant, b->zbin_extra, &xd->eobs[b_idx], diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 34adc9915..82c5b5bcd 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -348,35 +348,36 @@ int vp9_block_error_c(int16_t *coeff, int16_t *dqcoeff, int block_size) { } int vp9_mbblock_error_c(MACROBLOCK *mb) { + MACROBLOCKD * const xd = &mb->e_mbd; BLOCK *be; - BLOCKD *bd; - int i, j; - int berror, error = 0; + int i; + int error = 0; for (i = 0; i < 16; i++) { be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - berror = 0; - for (j = 0; j < 16; j++) { - int this_diff = be->coeff[j] - bd->dqcoeff[j]; - berror += this_diff * this_diff; - } - error += berror; + error += vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16); } return error; } int vp9_mbuverror_c(MACROBLOCK *mb) { + MACROBLOCKD * const xd = &mb->e_mbd; BLOCK *be; - BLOCKD *bd; int i, error = 0; - for (i = 16; i < 24; i++) { + for (i = 16; i < 20; i++) { be = &mb->block[i]; - bd = &mb->e_mbd.block[i]; - - error += vp9_block_error_c(be->coeff, bd->dqcoeff, 16); + error += vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[1].dqcoeff, i - 16, 16), + 16); + } + for (i = 20; i < 24; i++) { + be = &mb->block[i]; + error += vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[2].dqcoeff, i - 20, 16), + 16); } return error; @@ -438,7 +439,9 @@ static INLINE int cost_coeffs(VP9_COMMON *const cm, MACROBLOCK *mb, int c = 0; int cost = 0, pad; const int *scan, *nb; - const int16_t *qcoeff_ptr = xd->qcoeff + ib * 16; + const struct plane_block_idx pb_idx = plane_block_idx(xd, ib); + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); const int ref = mbmi->ref_frame != INTRA_FRAME; unsigned int (*token_costs)[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = mb->token_costs[tx_size][type][ref]; @@ -858,6 +861,26 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, return error > INT_MAX ? INT_MAX : (int)error; } +static int vp9_sb_uv_block_error_c(int16_t *coeff, + int16_t *dqcoeff0, int16_t *dqcoeff1, + int block_size, int shift) { + int i; + int64_t error = 0; + + for (i = 0; i < block_size / 2; i++) { + unsigned int this_diff = coeff[i] - dqcoeff0[i]; + error += this_diff * this_diff; + } + coeff += block_size / 2; + for (i = 0; i < block_size / 2; i++) { + unsigned int this_diff = coeff[i] - dqcoeff1[i]; + error += this_diff * this_diff; + } + error >>= shift; + + return error > INT_MAX ? INT_MAX : (int)error; +} + static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x) { int cost = 0, b; MACROBLOCKD *const xd = &x->e_mbd; @@ -884,7 +907,7 @@ static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_4x4(x); vp9_quantize_sby_4x4(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_4x4(cm, x); *skippable = vp9_sby_is_skippable_4x4(xd); } @@ -915,7 +938,7 @@ static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_8x8(x); vp9_quantize_sby_8x8(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_8x8(cm, x); *skippable = vp9_sby_is_skippable_8x8(xd); } @@ -946,7 +969,7 @@ static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_16x16(x); vp9_quantize_sby_16x16(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 2); *rate = rdcost_sby_16x16(cm, x); *skippable = vp9_sby_is_skippable_16x16(xd); } @@ -971,7 +994,7 @@ static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_32x32(x); vp9_quantize_sby_32x32(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 1024, 0); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 1024, 0); *rate = rdcost_sby_32x32(cm, x); *skippable = vp9_sby_is_skippable_32x32(xd); } @@ -1022,7 +1045,7 @@ static void super_block64_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_4x4(x); vp9_quantize_sb64y_4x4(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_4x4(cm, x); *skippable = vp9_sb64y_is_skippable_4x4(xd); } @@ -1053,7 +1076,7 @@ static void super_block64_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_8x8(x); vp9_quantize_sb64y_8x8(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_8x8(cm, x); *skippable = vp9_sb64y_is_skippable_8x8(xd); } @@ -1085,7 +1108,7 @@ static void super_block64_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_16x16(x); vp9_quantize_sb64y_16x16(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 2); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 2); *rate = rdcost_sb64y_16x16(cm, x); *skippable = vp9_sb64y_is_skippable_16x16(xd); } @@ -1117,7 +1140,7 @@ static void super_block64_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sb64y_32x32(x); vp9_quantize_sb64y_32x32(x); - *distortion = vp9_sb_block_error_c(x->coeff, xd->dqcoeff, 4096, 0); + *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, 4096, 0); *rate = rdcost_sb64y_32x32(cm, x); *skippable = vp9_sb64y_is_skippable_32x32(xd); } @@ -1163,8 +1186,8 @@ static void copy_predictor_8x8(uint8_t *dst, const uint8_t *predictor) { d[29] = p[29]; } -static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, - BLOCKD *b, B_PREDICTION_MODE *best_mode, +static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int ib, + B_PREDICTION_MODE *best_mode, int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, @@ -1175,6 +1198,8 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, int rate = 0; int distortion; VP9_COMMON *const cm = &cpi->common; + BLOCK *be = x->block + ib; + BLOCKD *b = xd->block + ib; ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; @@ -1188,6 +1213,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, DECLARE_ALIGNED_ARRAY(16, uint8_t, best_predictor, 16 * 4); DECLARE_ALIGNED_ARRAY(16, int16_t, best_dqcoeff, 16); + assert(ib < 16); #if CONFIG_NEWBINTRAMODES b->bmi.as_mode.context = vp9_find_bpred_context(xd, b); #endif @@ -1233,7 +1259,9 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, ratey = cost_coeffs(cm, x, b - xd->block, PLANE_TYPE_Y_WITH_DC, &tempa, &templ, TX_4X4); rate += ratey; - distortion = vp9_block_error(be->coeff, b->dqcoeff, 16) >> 2; + distortion = vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), + 16) >> 2; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); @@ -1247,7 +1275,7 @@ static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, BLOCK *be, *a = tempa; *l = templ; copy_predictor(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 32); + vpx_memcpy(best_dqcoeff, BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 32); } } b->bmi.as_mode.first = (B_PREDICTION_MODE)(*best_mode); @@ -1304,7 +1332,7 @@ static int64_t rd_pick_intra4x4mby_modes(VP9_COMP *cpi, MACROBLOCK *mb, #endif total_rd += rd_pick_intra4x4block( - cpi, mb, mb->block + i, xd->block + i, &best_mode, + cpi, mb, i, &best_mode, bmode_costs, ta + vp9_block2above[TX_4X4][i], tl + vp9_block2left[TX_4X4][i], &r, &ry, &d); @@ -1504,6 +1532,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // note the input and output index mapping int idx = (ib & 0x02) ? (ib + 2) : ib; + assert(ib < 16); for (mode = DC_PRED; mode <= TM_PRED; mode++) { int64_t this_rd; int rate_t = 0; @@ -1526,7 +1555,7 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, // compute quantization mse of 8x8 block distortion = vp9_block_error_c((x->block + idx)->coeff, - (xd->block + idx)->dqcoeff, 64); + BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); vpx_memcpy(&ta, a, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memcpy(&tl, l, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -1569,7 +1598,9 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, ib + iblock[i]); } - distortion += vp9_block_error_c(be->coeff, b->dqcoeff, 16 << do_two); + distortion += vp9_block_error_c(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[i], 16), + 16 << do_two); rate_t += cost_coeffs(cm, x, ib + iblock[i], PLANE_TYPE_Y_WITH_DC, i&1 ? ta1 : ta0, i&2 ? tl1 : tl0, TX_4X4); @@ -1598,8 +1629,10 @@ static int64_t rd_pick_intra8x8block(VP9_COMP *cpi, MACROBLOCK *x, int ib, best_rd = this_rd; *best_mode = mode; copy_predictor_8x8(best_predictor, b->predictor); - vpx_memcpy(best_dqcoeff, b->dqcoeff, 64); - vpx_memcpy(best_dqcoeff + 32, b->dqcoeff + 64, 64); + vpx_memcpy(best_dqcoeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16), 64); + vpx_memcpy(best_dqcoeff + 32, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib, 16) + 64, 64); } } b->bmi.as_mode.first = (*best_mode); @@ -1742,6 +1775,8 @@ static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { MACROBLOCKD *xd = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; + const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type; + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; if (backup) { vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -1760,6 +1795,7 @@ static int rd_cost_mbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { tl + vp9_block2left[TX_4X4][b], TX_4X4); + xd->mode_info_context->mbmi.sb_type = real_sb_type; return cost; } @@ -1783,6 +1819,8 @@ static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { MACROBLOCKD *xd = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta, *tl; + const BLOCK_SIZE_TYPE real_sb_type = xd->mode_info_context->mbmi.sb_type; + xd->mode_info_context->mbmi.sb_type = BLOCK_SIZE_MB16X16; if (backup) { vpx_memcpy(&t_above, xd->above_context, sizeof(ENTROPY_CONTEXT_PLANES)); @@ -1800,6 +1838,7 @@ static int rd_cost_mbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *mb, int backup) { ta + vp9_block2above[TX_8X8][b], tl + vp9_block2left[TX_8X8][b], TX_8X8); + xd->mode_info_context->mbmi.sb_type = real_sb_type; return cost; } @@ -1851,8 +1890,9 @@ static void rd_inter32x32_uv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, vp9_quantize_sbuv_16x16(x); *rate = rd_cost_sbuv_16x16(cm, x, backup); - *distortion = vp9_sb_block_error_c(x->coeff + 1024, - xd->dqcoeff + 1024, 512, 2); + *distortion = vp9_sb_uv_block_error_c(x->coeff + 1024, + xd->plane[1].dqcoeff, + xd->plane[2].dqcoeff, 512, 2); *skip = vp9_sbuv_is_skippable_16x16(xd); } @@ -2127,8 +2167,9 @@ static void rd_inter64x64_uv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, vp9_quantize_sb64uv_32x32(x); *rate = rd_cost_sb64uv_32x32(cm, x, backup); - *distortion = vp9_sb_block_error_c(x->coeff + 4096, - xd->dqcoeff + 4096, 2048, 0); + *distortion = vp9_sb_uv_block_error_c(x->coeff + 4096, + xd->plane[1].dqcoeff, + xd->plane[2].dqcoeff, 2048, 0); *skip = vp9_sb64uv_is_skippable_32x32(xd); } @@ -2466,7 +2507,8 @@ static int64_t encode_inter_mb_segment(VP9_COMMON *const cm, vp9_subtract_b(be, bd, 16); x->fwd_txm4x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4(x, i); - thisdistortion = vp9_block_error(be->coeff, bd->dqcoeff, 16); + thisdistortion = vp9_block_error(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, i, 16), 16); *distortion += thisdistortion; *labelyrate += cost_coeffs(cm, x, i, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_4X4][i], @@ -2508,11 +2550,12 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, const int use_second_ref = xd->mode_info_context->mbmi.second_ref_frame > 0; int which_mv; - int idx = (ib & 8) + ((ib & 2) << 1); - BLOCKD *bd = &xd->block[ib], *bd2 = &xd->block[idx]; + const int idx = (ib & 8) + ((ib & 2) << 1); + BLOCKD *bd = &xd->block[ib]; BLOCK *be = &x->block[ib], *be2 = &x->block[idx]; int thisdistortion; + assert(idx < 16); for (which_mv = 0; which_mv < 1 + use_second_ref; ++which_mv) { uint8_t **base_pre = which_mv ? bd->base_second_pre : bd->base_pre; @@ -2532,7 +2575,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, if (otherrd) { x->fwd_txm8x8(be->src_diff, be2->coeff, 32); x->quantize_b_8x8(x, idx, DCT_DCT); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); + thisdistortion = vp9_block_error_c(be2->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); otherdist += thisdistortion; xd->mode_info_context->mbmi.txfm_size = TX_8X8; othercost += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, @@ -2546,7 +2590,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, be = &x->block[ib + iblock[j]]; x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); - thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); + thisdistortion = vp9_block_error_c(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32); *distortion += thisdistortion; *labelyrate += cost_coeffs(cm, x, ib + iblock[j], PLANE_TYPE_Y_WITH_DC, @@ -2563,11 +2608,11 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, } else /* 8x8 */ { if (otherrd) { for (j = 0; j < 4; j += 2) { - BLOCKD *bd = &xd->block[ib + iblock[j]]; BLOCK *be = &x->block[ib + iblock[j]]; x->fwd_txm8x4(be->src_diff, be->coeff, 32); x->quantize_b_4x4_pair(x, ib + iblock[j], ib + iblock[j] + 1); - thisdistortion = vp9_block_error_c(be->coeff, bd->dqcoeff, 32); + thisdistortion = vp9_block_error_c(be->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, ib + iblock[j], 16), 32); otherdist += thisdistortion; xd->mode_info_context->mbmi.txfm_size = TX_4X4; othercost += @@ -2586,7 +2631,8 @@ static int64_t encode_inter_mb_segment_8x8(VP9_COMMON *const cm, } x->fwd_txm8x8(be->src_diff, be2->coeff, 32); x->quantize_b_8x8(x, idx, DCT_DCT); - thisdistortion = vp9_block_error_c(be2->coeff, bd2->dqcoeff, 64); + thisdistortion = vp9_block_error_c(be2->coeff, + BLOCK_OFFSET(xd->plane[0].dqcoeff, idx, 16), 64); *distortion += thisdistortion; *labelyrate += cost_coeffs(cm, x, idx, PLANE_TYPE_Y_WITH_DC, ta + vp9_block2above[TX_8X8][idx], diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 8f9e9da69..ab286fd8a 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -123,7 +123,9 @@ static void tokenize_b(VP9_COMP *cpi, int c = 0; const int eob = xd->eobs[ib]; /* one beyond last nonzero coeff */ TOKENEXTRA *t = *tp; /* store tokens starting here */ - int16_t *qcoeff_ptr = xd->qcoeff + 16 * ib; + const struct plane_block_idx pb_idx = plane_block_idx(xd, ib); + const int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, + pb_idx.block, 16); int seg_eob, default_eob, pad; const int segment_id = mbmi->segment_id; const BLOCK_SIZE_TYPE sb_type = mbmi->sb_type; diff --git a/vp9/encoder/x86/vp9_encodeopt.asm b/vp9/encoder/x86/vp9_encodeopt.asm index 90c793d4f..51314a7a8 100644 --- a/vp9/encoder/x86/vp9_encodeopt.asm +++ b/vp9/encoder/x86/vp9_encodeopt.asm @@ -260,117 +260,3 @@ sym(vp9_mbblock_error_xmm_impl): UNSHADOW_ARGS pop rbp ret - - -;int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_mmx_impl) PRIVATE -sym(vp9_mbuverror_mmx_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor mm7, mm7 - -.mbuverror_loop_mmx: - - movq mm1, [rsi] - movq mm2, [rdi] - - psubw mm1, mm2 - pmaddwd mm1, mm1 - - - movq mm3, [rsi+8] - movq mm4, [rdi+8] - - psubw mm3, mm4 - pmaddwd mm3, mm3 - - - paddd mm7, mm1 - paddd mm7, mm3 - - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop_mmx - - movq mm0, mm7 - psrlq mm7, 32 - - paddd mm0, mm7 - movq rax, mm0 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret - - -;int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -global sym(vp9_mbuverror_xmm_impl) PRIVATE -sym(vp9_mbuverror_xmm_impl): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 2 - push rsi - push rdi - ; end prolog - - - mov rsi, arg(0) ;s_ptr - mov rdi, arg(1) ;d_ptr - - mov rcx, 16 - pxor xmm3, xmm3 - -.mbuverror_loop: - - movdqa xmm1, [rsi] - movdqa xmm2, [rdi] - - psubw xmm1, xmm2 - pmaddwd xmm1, xmm1 - - paddd xmm3, xmm1 - - add rsi, 16 - add rdi, 16 - - dec rcx - jnz .mbuverror_loop - - pxor xmm0, xmm0 - movdqa xmm1, xmm3 - - movdqa xmm2, xmm1 - punpckldq xmm1, xmm0 - - punpckhdq xmm2, xmm0 - paddd xmm1, xmm2 - - movdqa xmm2, xmm1 - - psrldq xmm1, 8 - paddd xmm1, xmm2 - - movq rax, xmm1 - - pop rdi - pop rsi - ; begin epilog - UNSHADOW_ARGS - pop rbp - ret diff --git a/vp9/encoder/x86/vp9_x86_csystemdependent.c b/vp9/encoder/x86/vp9_x86_csystemdependent.c index 2bf32c569..9557af119 100644 --- a/vp9/encoder/x86/vp9_x86_csystemdependent.c +++ b/vp9/encoder/x86/vp9_x86_csystemdependent.c @@ -26,17 +26,10 @@ void vp9_short_fdct8x4_mmx(short *input, short *output, int pitch) { int vp9_mbblock_error_mmx_impl(short *coeff_ptr, short *dcoef_ptr); int vp9_mbblock_error_mmx(MACROBLOCK *mb) { short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; + short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff; return vp9_mbblock_error_mmx_impl(coeff_ptr, dcoef_ptr); } -int vp9_mbuverror_mmx_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_mmx(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_mmx_impl(s_ptr, d_ptr); -} - void vp9_subtract_b_mmx_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch); @@ -54,17 +47,10 @@ void vp9_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch) { int vp9_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr); int vp9_mbblock_error_xmm(MACROBLOCK *mb) { short *coeff_ptr = mb->block[0].coeff; - short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; + short *dcoef_ptr = mb->e_mbd.plane[0].dqcoeff; return vp9_mbblock_error_xmm_impl(coeff_ptr, dcoef_ptr); } -int vp9_mbuverror_xmm_impl(short *s_ptr, short *d_ptr); -int vp9_mbuverror_xmm(MACROBLOCK *mb) { - short *s_ptr = &mb->coeff[256]; - short *d_ptr = &mb->e_mbd.dqcoeff[256]; - return vp9_mbuverror_xmm_impl(s_ptr, d_ptr); -} - void vp9_subtract_b_sse2_impl(unsigned char *z, int src_stride, short *diff, unsigned char *predictor, int pitch);