From c7846ebc3438aa3e1611398567bf950d0656c590 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Wed, 1 Aug 2012 10:18:25 -0700 Subject: [PATCH] Use 8x8 DCT transform for I8X8 prediction mode Apply 2D-DCT transform of dimension 8x8 to encode prediction residuals of I8X8 mode. Brought back block type 3 probability context model for 8x8 tokens, which is used for the coefficients of Y blocks in I8x8 modes. The coefficient costs estimate of I8X8 mode in rate-distortion is also changed appropriately. Performance results: derf: 0.246 yt: 0.114 std-hd: 0.730 hd: 0.670 Change-Id: If1d970eeb4e1827c9f0d2c5b27d33089b347ea27 --- configure | 1 + vp8/common/default_coef_probs.h | 53 ++++++++++++++++ vp8/common/entropy.h | 4 ++ vp8/decoder/decodframe.c | 25 ++++++++ vp8/decoder/detokenize.c | 38 ++++++++++- vp8/encoder/encodeintra.c | 23 ++++++- vp8/encoder/rdopt.c | 48 ++++++++++++++ vp8/encoder/tokenize.c | 108 ++++++++++++++++++++++++++++++-- 8 files changed, 294 insertions(+), 6 deletions(-) diff --git a/configure b/configure index 525ccd168..269d997b5 100755 --- a/configure +++ b/configure @@ -226,6 +226,7 @@ EXPERIMENT_LIST=" lossless hybridtransform switchable_interp + htrans8x8 " CONFIG_LIST=" external_build diff --git a/vp8/common/default_coef_probs.h b/vp8/common/default_coef_probs.h index d0e114a3d..145faf1ac 100644 --- a/vp8/common/default_coef_probs.h +++ b/vp8/common/default_coef_probs.h @@ -434,4 +434,57 @@ vp8_default_coef_probs_8x8[BLOCK_TYPES_8X8] { 6, 117, 180, 254, 199, 216, 255, 251, 128, 128, 128} } } +#if CONFIG_HTRANS8X8 + , + { /* block Type 3 */ + { /* Coeff Band 0 */ + { 192, 18, 155, 172, 145, 164, 192, 135, 246, 223, 255}, + { 94, 29, 97, 131, 131, 153, 171, 121, 250, 190, 255}, + { 25, 29, 63, 128, 119, 147, 168, 124, 251, 183, 255}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + }, + { /* Coeff Band 1 */ + { 1, 108, 192, 220, 186, 173, 255, 194, 255, 255, 128}, + { 123, 104, 188, 221, 165, 171, 247, 180, 255, 255, 128}, + { 23, 76, 152, 216, 154, 166, 226, 182, 255, 209, 128}, + { 1, 26, 52, 162, 109, 152, 208, 144, 255, 231, 128} + }, + { /* Coeff Band 2 */ + { 1, 57, 179, 220, 156, 175, 210, 158, 255, 223, 128}, + { 48, 57, 134, 212, 151, 170, 219, 185, 255, 248, 128}, + { 4, 35, 63, 189, 120, 156, 221, 159, 255, 241, 128}, + { 1, 17, 23, 110, 97, 143, 187, 120, 255, 234, 128} + }, + { /* Coeff Band 3 */ + { 1, 115, 205, 243, 182, 187, 254, 218, 255, 255, 128}, + { 80, 101, 186, 241, 183, 186, 249, 182, 255, 255, 128}, + { 10, 81, 144, 229, 164, 175, 241, 185, 255, 255, 128}, + { 1, 44, 81, 192, 130, 148, 240, 180, 255, 255, 128} + }, + { /* Coeff Band 4 */ + { 1, 161, 207, 249, 187, 176, 255, 180, 128, 128, 128}, + { 79, 148, 196, 240, 186, 182, 253, 171, 255, 255, 128}, + { 14, 111, 171, 233, 170, 178, 235, 204, 255, 255, 128}, + { 1, 63, 103, 202, 143, 162, 240, 178, 255, 255, 128} + }, + { /* Coeff Band 5 */ + { 1, 101, 202, 239, 185, 184, 252, 186, 255, 255, 128}, + { 43, 67, 166, 237, 178, 190, 246, 194, 255, 255, 128}, + { 4, 49, 85, 220, 140, 168, 253, 182, 255, 255, 128}, + { 1, 24, 35, 144, 93, 135, 239, 159, 255, 253, 128} + }, + { /* Coeff Band 6 */ + { 1, 212, 243, 255, 240, 234, 255, 255, 128, 128, 128}, + { 98, 168, 234, 255, 229, 234, 255, 255, 128, 128, 128}, + { 19, 127, 199, 255, 212, 198, 255, 255, 128, 128, 128}, + { 1, 103, 162, 253, 186, 151, 255, 255, 128, 128, 128} + }, + { /* Coeff Band 7 */ + { 1, 188, 253, 255, 255, 128, 128, 128, 128, 128, 128}, + { 191, 68, 242, 255, 255, 128, 128, 128, 128, 128, 128}, + { 8, 132, 255, 128, 128, 128, 128, 128, 128, 128, 128}, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128} + } + } +#endif }; diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h index 9993741c8..36cae410c 100644 --- a/vp8/common/entropy.h +++ b/vp8/common/entropy.h @@ -63,7 +63,11 @@ extern vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define BLOCK_TYPES 4 +#if CONFIG_HTRANS8X8 +#define BLOCK_TYPES_8X8 4 +#else #define BLOCK_TYPES_8X8 3 +#endif /* Middle dimension is a coarsening of the coefficient's position within the 4x4 DCT. */ diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index c31595d59..47d0faa7b 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -235,6 +235,13 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, xd->mode_info_context->mbmi.txfm_size = TX_8X8; } } + +#if CONFIG_HTRANS8X8 + if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { + xd->mode_info_context->mbmi.txfm_size = TX_8X8; + } +#endif + tx_type = xd->mode_info_context->mbmi.txfm_size; if (xd->mode_info_context->mbmi.mb_skip_coeff) { @@ -356,11 +363,28 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int i8x8mode; BLOCKD *b; +#if CONFIG_HTRANS8X8 + int idx = (ib & 0x02) ? (ib + 2) : ib; + + short *q = xd->block[idx].qcoeff; + short *dq = xd->block[0].dequant; + unsigned char *pre = xd->block[ib].predictor; + unsigned char *dst = *(xd->block[ib].base_dst) + xd->block[ib].dst; + int stride = xd->dst.y_stride; + + tx_type = TX_4X4; + xd->mode_info_context->mbmi.txfm_size = TX_4X4; +#endif + b = &xd->block[ib]; i8x8mode = b->bmi.as_mode.first; RECON_INVOKE(RTCD_VTABLE(recon), intra8x8_predict) (b, i8x8mode, b->predictor); +#if CONFIG_HTRANS8X8 + vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); + q += 64; +#else for (j = 0; j < 4; j++) { b = &xd->block[ib + iblock[j]]; if (xd->eobs[ib + iblock[j]] > 1) { @@ -374,6 +398,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, ((int *)b->qcoeff)[0] = 0; } } +#endif b = &xd->block[16 + i]; RECON_INVOKE(RTCD_VTABLE(recon), intra_uv4x4_predict) diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c index 1acde4c1e..155877a4c 100644 --- a/vp8/decoder/detokenize.c +++ b/vp8/decoder/detokenize.c @@ -124,7 +124,8 @@ void static count_tokens_adaptive_scan(const MACROBLOCKD *xd, INT16 *qcoeff_ptr, int QIndex = xd->q_index; int active_ht = (QIndex < ACTIVE_HT) && - (xd->mode_info_context->mbmi.mode == B_PRED); + (xd->mode_info_context->mbmi.mode == B_PRED) && + (type == PLANE_TYPE_Y_WITH_DC); if(active_ht) { switch(xd->block[block].bmi.as_mode.tx_type) { @@ -351,8 +352,16 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) { const int segment_id = xd->mode_info_context->mbmi.segment_id; const int seg_active = segfeature_active(xd, segment_id, SEG_LVL_EOB); INT16 *qcoeff_ptr = &xd->qcoeff[0]; + +#if CONFIG_HTRANS8X8 + int bufthred = (xd->mode_info_context->mbmi.mode == I8X8_PRED) ? 16 : 24; + if (xd->mode_info_context->mbmi.mode != B_PRED && + xd->mode_info_context->mbmi.mode != SPLITMV && + xd->mode_info_context->mbmi.mode != I8X8_PRED) { +#else if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV) { +#endif ENTROPY_CONTEXT *const a = A + vp8_block2above_8x8[24]; ENTROPY_CONTEXT *const l = L + vp8_block2left_8x8[24]; const int *const scan = vp8_default_zig_zag1d; @@ -376,10 +385,16 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) { seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB); else seg_eob = 64; + +#if CONFIG_HTRANS8X8 + for (i = 0; i < bufthred ; i += 4) { +#else for (i = 0; i < 24; i += 4) { +#endif ENTROPY_CONTEXT *const a = A + vp8_block2above_8x8[i]; ENTROPY_CONTEXT *const l = L + vp8_block2left_8x8[i]; const int *const scan = vp8_default_zig_zag1d_8x8; + if (i == 16) type = PLANE_TYPE_UV; @@ -393,6 +408,27 @@ int vp8_decode_mb_tokens_8x8(VP8D_COMP *pbi, MACROBLOCKD *xd) { qcoeff_ptr += 64; } +#if CONFIG_HTRANS8X8 + if (xd->mode_info_context->mbmi.mode == I8X8_PRED) { + type = PLANE_TYPE_UV; + seg_eob = 16; + + // use 4x4 transform for U, V components in I8X8 prediction mode + for (i = 16; i < 24; i++) { + ENTROPY_CONTEXT *const a = A + vp8_block2above[i]; + ENTROPY_CONTEXT *const l = L + vp8_block2left[i]; + const int *scan = vp8_default_zig_zag1d; + + c = vp8_decode_coefs(pbi, xd, a, l, type, seg_eob, qcoeff_ptr, + i, scan, TX_4X4, coef_bands_x); + a[0] = l[0] = ((eobs[i] = c) != !type); + + eobtotal += c; + qcoeff_ptr += 16; + } + } +#endif + return eobtotal; } diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 46b352ef8..01ae03a23 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -306,6 +306,25 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, } #endif +#if CONFIG_HTRANS8X8 + { + MACROBLOCKD *xd = &x->e_mbd; + int idx = (ib & 0x02) ? (ib + 2) : ib; + + // generate residual blocks + vp8_subtract_4b_c(be, b, 16); + x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + x->quantize_b_8x8(x->block + idx, xd->block + idx); + vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); + + // reconstruct submacroblock + for (i = 0; i < 4; i++) { + b = &xd->block[ib + iblock[i]]; + vp8_recon_b_c(b->predictor, b->diff, *(b->base_dst) + b->dst, + b->dst_stride); + } + } +#else for (i = 0; i < 4; i++) { b = &x->e_mbd.block[ib + iblock[i]]; be = &x->block[ib + iblock[i]]; @@ -314,8 +333,10 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, x->quantize_b(be, b); vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32); RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, - b->diff, *(b->base_dst) + b->dst, b->dst_stride); + b->diff, *(b->base_dst) + b->dst, + b->dst_stride); } +#endif } extern const int vp8_i8x8_block[4]; diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 5fd92a5fa..ad9b4cebf 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -447,6 +447,20 @@ int vp8_block_error_c(short *coeff, short *dqcoeff) { return error; } +#if CONFIG_HTRANS8X8 +int vp8_submb_error_c(short *coeff, short *dqcoeff) { + int i; + int error = 0; + + for (i = 0; i < 64; i++) { + int this_diff = coeff[i] - dqcoeff[i]; + error += this_diff * this_diff; + } + + return error; +} +#endif + int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) { BLOCK *be; BLOCKD *bd; @@ -1175,6 +1189,12 @@ static int rd_pick_intra8x8block( DECLARE_ALIGNED_ARRAY(16, unsigned char, best_predictor, 16 * 8); DECLARE_ALIGNED_ARRAY(16, short, best_dqcoeff, 16 * 4); +#if CONFIG_HTRANS8X8 + // perform transformation of dimension 8x8 + // note the input and output index mapping + int idx = (ib & 0x02) ? (ib + 2) : ib; +#endif + for (mode = DC_PRED; mode <= TM_PRED; mode++) { #if CONFIG_COMP_INTRA_PRED for (mode2 = DC_PRED - 1; mode2 != TM_PRED + 1; mode2++) { @@ -1200,6 +1220,24 @@ static int rd_pick_intra8x8block( vp8_subtract_4b_c(be, b, 16); +#if CONFIG_HTRANS8X8 + x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + x->quantize_b_8x8(x->block + idx, xd->block + idx); + + // compute quantization mse of 8x8 block + distortion = vp8_submb_error_c((x->block + idx)->coeff, + (xd->block + idx)->dqcoeff)>>2; + + ta0 = *(a + vp8_block2above_8x8[idx]); + tl0 = *(l + vp8_block2left_8x8 [idx]); + + rate_t = cost_coeffs_8x8(x, xd->block + idx, PLANE_TYPE_Y_WITH_DC, + &ta0, + &tl0); + rate += rate_t; + ta1 = ta0; + tl1 = tl0; +#else x->vp8_short_fdct8x4(be->src_diff, be->coeff, 32); x->vp8_short_fdct8x4(be->src_diff + 64, be->coeff + 64, 32); @@ -1230,6 +1268,8 @@ static int rd_pick_intra8x8block( rate_t += cost_coeffs(x, xd->block + ib + 5, PLANE_TYPE_Y_WITH_DC, &ta1, &tl1); rate += rate_t; +#endif + this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; @@ -1257,10 +1297,18 @@ static int rd_pick_intra8x8block( b->bmi.as_mode.second = (*best_second_mode); #endif vp8_encode_intra8x8(IF_RTCD(&cpi->rtcd), x, ib); + +#if CONFIG_HTRANS8X8 + *(a + vp8_block2above_8x8[idx]) = besta0; + *(a + vp8_block2above_8x8[idx] + 1) = besta1; + *(l + vp8_block2left_8x8 [idx]) = bestl0; + *(l + vp8_block2left_8x8 [idx] + 1) = bestl1; +#else *(a + vp8_block2above[ib]) = besta0; *(a + vp8_block2above[ib + 1]) = besta1; *(l + vp8_block2above[ib]) = bestl0; *(l + vp8_block2above[ib + 4]) = bestl1; +#endif return best_rd; } diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index 50a31643d..81ba6f2be 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -334,9 +334,8 @@ static void tokenize1st_order_ht( MACROBLOCKD *xd, } // assign scanning order for luma components coded in intra4x4 mode - if( ( ( xd->mode_info_context->mbmi.mode == B_PRED ) || - ( xd->mode_info_context->mbmi.mode == I8X8_PRED ) ) && - ( type == PLANE_TYPE_Y_WITH_DC) ) { + if( (xd->mode_info_context->mbmi.mode == B_PRED) && + (type == PLANE_TYPE_Y_WITH_DC) ) { switch(b_mode) { case B_VE_PRED : case B_VR_PRED : @@ -455,6 +454,84 @@ static void tokenize1st_order_ht( MACROBLOCKD *xd, } #endif + +#if CONFIG_HTRANS8X8 +static void tokenize1st_order_chroma +( + MACROBLOCKD *xd, + TOKENEXTRA **tp, + int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ + VP8_COMP *cpi +) { + unsigned int block; + const BLOCKD *b; + int pt; /* near block/prev token context index */ + int c; + int token; + TOKENEXTRA *t = *tp;/* store tokens starting here */ + const short *qcoeff_ptr; + ENTROPY_CONTEXT *a; + ENTROPY_CONTEXT *l; + int band, rc, v; + int tmp1, tmp2; + + int seg_eob = 16; + int segment_id = xd->mode_info_context->mbmi.segment_id; + + if (segfeature_active(xd, segment_id, SEG_LVL_EOB)) { + seg_eob = get_segdata(xd, segment_id, SEG_LVL_EOB); + } + + b = xd->block; + b += 16; + + /* Chroma */ + for (block = 16; block < 24; block++, b++) { + tmp1 = vp8_block2above[block]; + tmp2 = vp8_block2left[block]; + qcoeff_ptr = b->qcoeff; + a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; + l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; + + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + for (c = 0; c < b->eob; c++) { + rc = vp8_default_zig_zag1d[c]; + band = vp8_coef_bands[c]; + v = qcoeff_ptr[rc]; + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + token = vp8_dct_value_tokens_ptr[v].Token; + + t->Token = token; + t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt]; + + t->skip_eob_node = ((pt == 0) && (band > 0)); + + ++cpi->coef_counts [2] [band] [pt] [token]; + + pt = vp8_prev_token_class[token]; + t++; + } + + if (c < seg_eob) { + band = vp8_coef_bands[c]; + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt]; + + t->skip_eob_node = ((pt == 0) && (band > 0)); + + ++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; + + t++; + } + *tp = t; + pt = (c != 0); /* 0 <-> all coeff data is zero */ + *a = *l = pt; + } +} +#endif + static void tokenize1st_order_b ( MACROBLOCKD *xd, @@ -640,7 +717,8 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { #if CONFIG_HYBRIDTRANSFORM int QIndex = cpi->mb.q_index; - int active_ht = (QIndex < ACTIVE_HT); + int active_ht = (QIndex < ACTIVE_HT) && + (x->mode_info_context->mbmi.mode == B_PRED); #endif if (!segfeature_active(x, segment_id, SEG_LVL_EOB) || @@ -717,7 +795,29 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { if(active_ht) { tokenize1st_order_ht(x, t, plane_type, cpi); } else { + +#if CONFIG_HTRANS8X8 + if (x->mode_info_context->mbmi.mode == I8X8_PRED) { + ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context; + ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context; + for (b = 0; b < 16; b += 4) { + tokenize1st_order_b_8x8(x, + x->block + b, t, PLANE_TYPE_Y_WITH_DC, + x->frame_type, + A + vp8_block2above_8x8[b], + L + vp8_block2left_8x8[b], + cpi); + *(A + vp8_block2above_8x8[b] + 1) = *(A + vp8_block2above_8x8[b]); + *(L + vp8_block2left_8x8[b] + 1) = *(L + vp8_block2left_8x8[b]); + } + tokenize1st_order_chroma(x, t, PLANE_TYPE_UV, cpi); + } else { + tokenize1st_order_b(x, t, plane_type, cpi); + } +#else tokenize1st_order_b(x, t, plane_type, cpi); +#endif + } #else tokenize1st_order_b(x, t, plane_type, cpi); -- 2.40.0