From: John Koleszar Date: Tue, 23 Apr 2013 16:51:09 +0000 (-0700) Subject: Convert coeff to per-plane MACROBLOCK data X-Git-Tag: v1.3.0~1106^2~164^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=138ec38cab4e8093d459600cd744720b036f2d50;p=libvpx Convert coeff to per-plane MACROBLOCK data This commit moves the coeff storage from the MACROBLOCK struct to its per-plane part. The next commit will remove the coeff member from the BLOCK structure so that it is consistently accessed per-plane. Also refactors vp9_sb_block_error_c and vp9_sb_uv_block_error_c to be variable subsampling aware. Change-Id: I18c30f87f27c3a012119b6c1970d5fa499804455 --- diff --git a/vp9/encoder/vp9_block.h b/vp9/encoder/vp9_block.h index a054269aa..7b5b158da 100644 --- a/vp9/encoder/vp9_block.h +++ b/vp9/encoder/vp9_block.h @@ -84,12 +84,12 @@ typedef struct { struct macroblock_plane { DECLARE_ALIGNED(16, int16_t, src_diff[64*64]); + DECLARE_ALIGNED(16, int16_t, coeff[64*64]); }; typedef struct macroblock MACROBLOCK; struct macroblock { struct macroblock_plane plane[MAX_MB_PLANE]; - DECLARE_ALIGNED(16, int16_t, coeff[64*64+32*32*2]); // 16 Y blocks, 4 U blocks, 4 V blocks, BLOCK block[24]; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index fd65dfa45..27d386108 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1736,8 +1736,12 @@ void vp9_encode_frame(VP9_COMP *cpi) { void vp9_setup_block_ptrs(MACROBLOCK *x) { int i; - for (i = 0; i < 24; i++) - x->block[i].coeff = x->coeff + i * 16; + for (i = 0; i < 16; i++) + x->block[i].coeff = x->plane[0].coeff + i * 16; + for (i = 16; i < 20; i++) + x->block[i].coeff = x->plane[1].coeff + (i - 16) * 16; + for (i = 20; i < 24; i++) + x->block[i].coeff = x->plane[2].coeff + (i - 20) * 16; } void vp9_build_block_offsets(MACROBLOCK *x) { diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index b6fe6dfa7..1994bb2ec 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -78,7 +78,7 @@ void vp9_transform_sby_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int x_idx = n & (bw - 1), y_idx = n >> bwl; vp9_short_fdct32x32(x->plane[0].src_diff + y_idx * stride * 32 + x_idx * 32, - x->coeff + n * 1024, stride * 2); + x->plane[0].coeff + n * 1024, stride * 2); } } @@ -97,10 +97,10 @@ void vp9_transform_sby_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { if (tx_type != DCT_DCT) { vp9_short_fht16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16, - x->coeff + n * 256, stride, tx_type); + x->plane[0].coeff + n * 256, stride, tx_type); } else { x->fwd_txm16x16(x->plane[0].src_diff + y_idx * stride * 16 + x_idx * 16, - x->coeff + n * 256, stride * 2); + x->plane[0].coeff + n * 256, stride * 2); } } } @@ -118,10 +118,10 @@ void vp9_transform_sby_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { if (tx_type != DCT_DCT) { vp9_short_fht8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8, - x->coeff + n * 64, stride, tx_type); + x->plane[0].coeff + n * 64, stride, tx_type); } else { x->fwd_txm8x8(x->plane[0].src_diff + y_idx * stride * 8 + x_idx * 8, - x->coeff + n * 64, stride * 2); + x->plane[0].coeff + n * 64, stride * 2); } } } @@ -139,10 +139,10 @@ void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { if (tx_type != DCT_DCT) { vp9_short_fht4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4, - x->coeff + n * 16, stride, tx_type); + x->plane[0].coeff + n * 16, stride, tx_type); } else { x->fwd_txm4x4(x->plane[0].src_diff + y_idx * stride * 4 + x_idx * 4, - x->coeff + n * 16, stride * 2); + x->plane[0].coeff + n * 16, stride * 2); } } } @@ -150,15 +150,12 @@ void vp9_transform_sby_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { void vp9_transform_sbuv_32x32(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { assert(bsize == BLOCK_SIZE_SB64X64); vp9_clear_system_state(); - vp9_short_fdct32x32(x->plane[1].src_diff, - x->coeff + 4096, 64); - vp9_short_fdct32x32(x->plane[2].src_diff, - x->coeff + 4096 + 1024, 64); + vp9_short_fdct32x32(x->plane[1].src_diff, x->plane[1].coeff, 64); + vp9_short_fdct32x32(x->plane[2].src_diff, x->plane[2].coeff, 64); } void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); - const int uoff = (16 * 16) << (bwl + bhl), voff = (uoff * 5) >> 2; const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); const int stride = 16 << (bwl - 1); int n; @@ -168,15 +165,14 @@ void vp9_transform_sbuv_16x16(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); x->fwd_txm16x16(x->plane[1].src_diff + y_idx * stride * 16 + x_idx * 16, - x->coeff + uoff + n * 256, stride * 2); + x->plane[1].coeff + n * 256, stride * 2); x->fwd_txm16x16(x->plane[2].src_diff + y_idx * stride * 16 + x_idx * 16, - x->coeff + voff + n * 256, stride * 2); + x->plane[2].coeff + n * 256, stride * 2); } } void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; - const int uoff = (8 * 8) << (bwl + bhl), voff = (uoff * 5) >> 2; const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); const int stride = 8 << (bwl - 1); int n; @@ -186,15 +182,14 @@ void vp9_transform_sbuv_8x8(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); x->fwd_txm8x8(x->plane[1].src_diff + y_idx * stride * 8 + x_idx * 8, - x->coeff + uoff + n * 64, stride * 2); + x->plane[1].coeff + n * 64, stride * 2); x->fwd_txm8x8(x->plane[2].src_diff + y_idx * stride * 8 + x_idx * 8, - x->coeff + voff + n * 64, stride * 2); + x->plane[2].coeff + n * 64, stride * 2); } } void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; - const int uoff = (4 * 4) << (bwl + bhl), voff = (uoff * 5) >> 2; const int bw = 1 << (bwl - 1), bh = 1 << (bhl - 1); const int stride = 4 << (bwl - 1); int n; @@ -204,9 +199,9 @@ void vp9_transform_sbuv_4x4(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize) { const int x_idx = n & (bw - 1), y_idx = n >> (bwl - 1); x->fwd_txm4x4(x->plane[1].src_diff + y_idx * stride * 4 + x_idx * 4, - x->coeff + uoff + n * 16, stride * 2); + x->plane[1].coeff + n * 16, stride * 2); x->fwd_txm4x4(x->plane[2].src_diff + y_idx * stride * 4 + x_idx * 4, - x->coeff + voff + n * 16, stride * 2); + x->plane[2].coeff + n * 16, stride * 2); } } @@ -265,7 +260,8 @@ static void optimize_b(VP9_COMMON *const cm, vp9_token_state tokens[1025][2]; unsigned best_index[1025][2]; const struct plane_block_idx pb_idx = plane_block_idx(y_blocks, ib); - const int16_t *coeff_ptr = mb->coeff + ib * 16; + const int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, + pb_idx.block, 16); int16_t *qcoeff_ptr; int16_t *dqcoeff_ptr; int eob = xd->plane[pb_idx.plane].eobs[pb_idx.block], final_eob, sz = 0; diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index 18f0f4fa0..2ae74543e 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -33,7 +33,7 @@ void vp9_ht_quantize_b_4x4(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type) { int i, rc, eob; int zbin; int x, y, z, sz; - int16_t *coeff_ptr = mb->coeff + b_idx * 16; + int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[0].coeff, b_idx, 16); // ht is luma-only int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[0].qcoeff, b_idx, 16); int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[0].dqcoeff, b_idx, 16); @@ -102,7 +102,8 @@ void vp9_regular_quantize_b_4x4(MACROBLOCK *mb, int b_idx, int y_blocks) { int i, rc, eob; int zbin; int x, y, z, sz; - int16_t *coeff_ptr = mb->coeff + b_idx * 16; + int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, + pb_idx.block, 16); int16_t *qcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16); int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, @@ -163,6 +164,8 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, pb_idx.block, 16); int16_t *dqcoeff_ptr = BLOCK_OFFSET(xd->plane[pb_idx.plane].dqcoeff, pb_idx.block, 16); + int16_t *coeff_ptr = BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, + pb_idx.block, 16); BLOCK *const b = &mb->block[c_idx]; BLOCKD *const d = &xd->block[c_idx]; const int *pt_scan; @@ -191,7 +194,6 @@ void vp9_regular_quantize_b_8x8(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, int x, y, z, sz; int zero_run; int16_t *zbin_boost_ptr = b->zrun_zbin_boost; - int16_t *coeff_ptr = mb->coeff + 16 * b_idx; int16_t *zbin_ptr = b->zbin; int16_t *round_ptr = b->round; int16_t *quant_ptr = b->quant; @@ -331,7 +333,7 @@ void vp9_regular_quantize_b_16x16(MACROBLOCK *mb, int b_idx, TX_TYPE tx_type, if (c_idx == 16) assert(pb_idx.plane == 1); if (c_idx == 20) assert(pb_idx.plane == 2); quantize(b->zrun_zbin_boost, - mb->coeff + 16 * b_idx, + BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), 256, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, BLOCK_OFFSET(xd->plane[pb_idx.plane].qcoeff, pb_idx.block, 16), @@ -353,7 +355,7 @@ void vp9_regular_quantize_b_32x32(MACROBLOCK *mb, int b_idx, int y_blocks) { if (c_idx == 16) assert(pb_idx.plane == 1); if (c_idx == 20) assert(pb_idx.plane == 2); quantize(b->zrun_zbin_boost, - mb->coeff + b_idx * 16, + BLOCK_OFFSET(mb->plane[pb_idx.plane].coeff, pb_idx.block, 16), 1024, b->skip_block, b->zbin, b->round, b->quant, b->quant_shift, diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 5517b1574..693da6119 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -648,13 +648,13 @@ static void choose_txfm_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, rd[TX_4X4][1] : rd[TX_8X8][1]; } -static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, - int block_size, int shift) { +static int block_error(int16_t *coeff, int16_t *dqcoeff, + int block_size, int shift) { int i; int64_t error = 0; for (i = 0; i < block_size; i++) { - unsigned int this_diff = coeff[i] - dqcoeff[i]; + int this_diff = coeff[i] - dqcoeff[i]; error += this_diff * this_diff; } error >>= shift; @@ -662,24 +662,24 @@ static int vp9_sb_block_error_c(int16_t *coeff, int16_t *dqcoeff, return error > INT_MAX ? INT_MAX : (int)error; } -static int vp9_sb_uv_block_error_c(int16_t *coeff, - int16_t *dqcoeff0, int16_t *dqcoeff1, - int block_size, int shift) { - int i; - int64_t error = 0; +static int block_error_sby(MACROBLOCK *x, int block_size, int shift) { + return block_error(x->plane[0].coeff, x->e_mbd.plane[0].dqcoeff, + block_size, shift); +} - for (i = 0; i < block_size / 2; i++) { - unsigned int this_diff = coeff[i] - dqcoeff0[i]; - error += this_diff * this_diff; - } - coeff += block_size / 2; - for (i = 0; i < block_size / 2; i++) { - unsigned int this_diff = coeff[i] - dqcoeff1[i]; - error += this_diff * this_diff; - } - error >>= shift; +static int block_error_sbuv(MACROBLOCK *x, BLOCK_SIZE_TYPE bsize, int shift) { + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); + int64_t sum = 0; + int plane; - return error > INT_MAX ? INT_MAX : (int)error; + for (plane = 1; plane < MAX_MB_PLANE; plane++) { + const int subsampling = x->e_mbd.plane[plane].subsampling_x + + x->e_mbd.plane[plane].subsampling_y; + sum += block_error(x->plane[plane].coeff, x->e_mbd.plane[plane].dqcoeff, + 16 << (bwl + bhl - subsampling), 0); + } + sum >>= shift; + return sum > INT_MAX ? INT_MAX : (int)sum; } static int rdcost_sby_4x4(VP9_COMMON *const cm, MACROBLOCK *x, @@ -716,8 +716,7 @@ static void super_block_yrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, vp9_transform_sby_4x4(x, bsize); vp9_quantize_sby_4x4(x, bsize); - *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, - 16 << (bwl + bhl), 2); + *distortion = block_error_sby(x, 16 << (bwl + bhl), 2); *rate = rdcost_sby_4x4(cm, x, bsize); *skippable = vp9_sby_is_skippable(xd, bsize); } @@ -749,15 +748,14 @@ static int rdcost_sby_8x8(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_yrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_8X8; vp9_transform_sby_8x8(x, bsize); vp9_quantize_sby_8x8(x, bsize); - *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, - 64 << (bhl + bwl), 2); + *distortion = block_error_sby(x, 16 << (bhl + bwl), 2); *rate = rdcost_sby_8x8(cm, x, bsize); *skippable = vp9_sby_is_skippable(xd, bsize); } @@ -787,15 +785,14 @@ static int rdcost_sby_16x16(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_yrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_16X16; vp9_transform_sby_16x16(x, bsize); vp9_quantize_sby_16x16(x, bsize); - *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, - 256 << (bwl + bhl), 2); + *distortion = block_error_sby(x, 16 << (bwl + bhl), 2); *rate = rdcost_sby_16x16(cm, x, bsize); *skippable = vp9_sby_is_skippable(xd, bsize); } @@ -827,15 +824,14 @@ static int rdcost_sby_32x32(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_yrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skippable, BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize) - 1, bhl = mb_height_log2(bsize) - 1; + const int bwl = b_width_log2(bsize), bhl = b_height_log2(bsize); MACROBLOCKD *const xd = &x->e_mbd; xd->mode_info_context->mbmi.txfm_size = TX_32X32; vp9_transform_sby_32x32(x, bsize); vp9_quantize_sby_32x32(x, bsize); - *distortion = vp9_sb_block_error_c(x->coeff, xd->plane[0].dqcoeff, - 1024 << (bwl + bhl), 0); + *distortion = block_error_sby(x, 16 << (bwl + bhl), 0); *rate = rdcost_sby_32x32(cm, x, bsize); *skippable = vp9_sby_is_skippable(xd, bsize); } @@ -1385,17 +1381,13 @@ static int rd_cost_sbuv_4x4(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd_4x4(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skip, BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize) + 2, bhl = mb_height_log2(bsize) + 2; MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sbuv_4x4(x, bsize); vp9_quantize_sbuv_4x4(x, bsize); *rate = rd_cost_sbuv_4x4(cm, x, bsize); - *distortion = vp9_sb_uv_block_error_c(x->coeff + (16 << (bwl + bhl)), - xd->plane[1].dqcoeff, - xd->plane[2].dqcoeff, - 32 << (bwl + bhl - 2), 2); + *distortion = block_error_sbuv(x, bsize, 2); *skip = vp9_sbuv_is_skippable(xd, bsize); } @@ -1430,17 +1422,13 @@ static int rd_cost_sbuv_8x8(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd_8x8(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skip, BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize) + 1, bhl = mb_height_log2(bsize) + 1; MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sbuv_8x8(x, bsize); vp9_quantize_sbuv_8x8(x, bsize); *rate = rd_cost_sbuv_8x8(cm, x, bsize); - *distortion = vp9_sb_uv_block_error_c(x->coeff + (64 << (bwl + bhl)), - xd->plane[1].dqcoeff, - xd->plane[2].dqcoeff, - 128 << (bwl + bhl - 2), 2); + *distortion = block_error_sbuv(x, bsize, 2); *skip = vp9_sbuv_is_skippable(xd, bsize); } @@ -1475,17 +1463,13 @@ static int rd_cost_sbuv_16x16(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd_16x16(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skip, BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize), bhl = mb_height_log2(bsize); MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sbuv_16x16(x, bsize); vp9_quantize_sbuv_16x16(x, bsize); *rate = rd_cost_sbuv_16x16(cm, x, bsize); - *distortion = vp9_sb_uv_block_error_c(x->coeff + (256 << (bwl + bhl)), - xd->plane[1].dqcoeff, - xd->plane[2].dqcoeff, - 512 << (bwl + bhl - 2), 2); + *distortion = block_error_sbuv(x, bsize, 2); *skip = vp9_sbuv_is_skippable(xd, bsize); } @@ -1521,17 +1505,13 @@ static int rd_cost_sbuv_32x32(VP9_COMMON *const cm, MACROBLOCK *x, static void super_block_uvrd_32x32(VP9_COMMON *const cm, MACROBLOCK *x, int *rate, int *distortion, int *skip, BLOCK_SIZE_TYPE bsize) { - const int bwl = mb_width_log2(bsize) - 1, bhl = mb_height_log2(bsize) - 1; MACROBLOCKD *const xd = &x->e_mbd; vp9_transform_sbuv_32x32(x, bsize); vp9_quantize_sbuv_32x32(x, bsize); *rate = rd_cost_sbuv_32x32(cm, x, bsize); - *distortion = vp9_sb_uv_block_error_c(x->coeff + (1024 << (bwl + bhl)), - xd->plane[1].dqcoeff, - xd->plane[2].dqcoeff, - 2048 << (bwl + bhl - 2), 0); + *distortion = block_error_sbuv(x, bsize, 0); *skip = vp9_sbuv_is_skippable(xd, bsize); }