From 66f440f1ee6c993eff908da9c75cc2ae9de08775 Mon Sep 17 00:00:00 2001 From: Jingning Han Date: Mon, 6 Aug 2012 14:48:11 -0700 Subject: [PATCH] Refactoring hybrid transform coding The forward and inverse hybrid transforms are now performed using single function modules, where the dimension is sent as argument. Added an inline function clip8b to clip the reconstruction pixels into range of 0-255. Change-Id: Id7d870b3e1aefc092721c80c0af6f641eb5f3747 --- vp8/common/idct.h | 5 +- vp8/common/idctllm.c | 168 ++++++++----------------------------- vp8/common/invtrans.c | 2 +- vp8/decoder/decodframe.c | 1 - vp8/decoder/dequantize.c | 11 +-- vp8/encoder/dct.c | 169 +++++++------------------------------- vp8/encoder/dct.h | 6 +- vp8/encoder/encodeintra.c | 16 ++-- vp8/encoder/rdopt.c | 17 ++-- 9 files changed, 87 insertions(+), 308 deletions(-) diff --git a/vp8/common/idct.h b/vp8/common/idct.h index 5336f5ab0..2a410c34e 100644 --- a/vp8/common/idct.h +++ b/vp8/common/idct.h @@ -111,9 +111,10 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_lossless_c); extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c); #endif -#if CONFIG_HYBRIDTRANSFORM +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM #include "vp8/common/blockd.h" -void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type); +void vp8_ihtllm_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim); #endif diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c index 616e4938e..5c7bf78d2 100644 --- a/vp8/common/idctllm.c +++ b/vp8/common/idctllm.c @@ -93,120 +93,17 @@ float iadst_8[64] = { }; #endif -#if CONFIG_HYBRIDTRANSFORM -void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { - int i, j, k; - float bufa[16], bufb[16]; // buffers are for floating-point test purpose - // the implementation could be simplified in - // conjunction with integer transform - short *ip = input; - short *op = output; - int shortpitch = pitch >> 1; - - float *pfa = &bufa[0]; - float *pfb = &bufb[0]; - - // pointers to vertical and horizontal transforms - float *ptv, *pth; - - // load and convert residual array into floating-point - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = (float)ip[i]; - } - pfa += 4; - ip += 4; - } - - // vertical transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case ADST_DCT : - ptv = &iadst_4[0]; - break; - - default : - ptv = &idct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfb[i] = 0 ; - for(k = 0; k < 4; k++) { - pfb[i] += ptv[k] * pfa[(k<<2)]; - } - pfa += 1; - } - - pfb += 4; - ptv += 4; - pfa = &bufa[0]; - } - - // horizontal transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &iadst_4[0]; - break; - - default : - pth = &idct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = 0; - for(k = 0; k < 4; k++) { - pfa[i] += pfb[k] * pth[k]; - } - pth += 4; - } - - pfa += 4; - pfb += 4; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &iadst_4[0]; - break; - - default : - pth = &idct_4[0]; - break; - } - } - - // convert to short integer format and load BLOCKD buffer - op = output; - pfa = &bufa[0]; - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) : - -(short)( - pfa[i] / 8 + 0.49); - } - op += shortpitch; - pfa += 4; - } -} -#endif - -#if CONFIG_HYBRIDTRANSFORM8X8 -void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +void vp8_ihtllm_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim) { int i, j, k; float bufa[64], bufb[64]; // buffers are for floating-point test purpose // the implementation could be simplified in // conjunction with integer transform + + // further notice, since we are thinking to use one + // function for both 4x4 and 8x8 transforms, the + // temporary buffers are simply initialized with 64. short *ip = input; short *op = output; int shortpitch = pitch >> 1; @@ -218,12 +115,12 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { float *ptv, *pth; // load and convert residual array into floating-point - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = (float)ip[i]; } - pfa += 8; - ip += 8; + pfa += tx_dim; + ip += tx_dim; } // vertical transformation @@ -233,25 +130,25 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case ADST_DCT : - ptv = &iadst_8[0]; + ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; break; default : - ptv = &idct_8[0]; + ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfb[i] = 0 ; - for(k = 0; k < 8; k++) { - pfb[i] += ptv[k] * pfa[(k<<3)]; + for(k = 0; k < tx_dim; k++) { + pfb[i] += ptv[k] * pfa[(k * tx_dim)]; } pfa += 1; } - pfb += 8; - ptv += 8; + pfb += tx_dim; + ptv += tx_dim; pfa = &bufa[0]; } @@ -262,34 +159,34 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &iadst_8[0]; + pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; break; default : - pth = &idct_8[0]; + pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = 0; - for(k = 0; k < 8; k++) { + for(k = 0; k < tx_dim; k++) { pfa[i] += pfb[k] * pth[k]; } - pth += 8; + pth += tx_dim; } - pfa += 8; - pfb += 8; + pfa += tx_dim; + pfb += tx_dim; switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &iadst_8[0]; + pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0]; break; default : - pth = &idct_8[0]; + pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0]; break; } } @@ -298,13 +195,14 @@ void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) { op = output; pfa = &bufa[0]; - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) : -(short)( - pfa[i] / 8 + 0.49); } + op += shortpitch; - pfa += 8; + pfa += tx_dim; } } #endif diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c index de9aad58d..1357839c7 100644 --- a/vp8/common/invtrans.c +++ b/vp8/common/invtrans.c @@ -33,7 +33,7 @@ static void recon_dcblock_8x8(MACROBLOCKD *x) { #if CONFIG_HYBRIDTRANSFORM void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) { - vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type); + vp8_ihtllm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type, 4); } #endif diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c index 057104f49..59f453edf 100644 --- a/vp8/decoder/decodframe.c +++ b/vp8/decoder/decodframe.c @@ -392,7 +392,6 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, txfm_map(b, pred_mode_conv(i8x8mode)); vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type, q, dq, pre, dst, 16, stride); - // vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride); q += 64; #else for (j = 0; j < 4; j++) { diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c index bf44fd61a..6164c44d5 100644 --- a/vp8/decoder/dequantize.c +++ b/vp8/decoder/dequantize.c @@ -55,7 +55,7 @@ void vp8_ht_dequant_idct_add_c(TX_TYPE tx_type, short *input, short *dq, input[i] = dq[i] * input[i]; } - vp8_iht4x4llm_c( input, output, 4 << 1, tx_type ); + vp8_ihtllm_c(input, output, 4 << 1, tx_type, 4); vpx_memset(input, 0, 32); @@ -95,7 +95,7 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, input[i] = dq[1] * input[i]; } - vp8_iht8x8llm_c(input, output, 16, tx_type); + vp8_ihtllm_c(input, output, 16, tx_type, 8); vpx_memset(input, 0, 128); @@ -117,9 +117,10 @@ void vp8_ht_dequant_idct_add_8x8_c(TX_TYPE tx_type, short *input, short *dq, diff_ptr += 8; pred += pitch; } - diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4; - dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4; - pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4; + // shift buffer pointers to next 4x4 block in the submacroblock + diff_ptr = output + (b + 1) / 2 * 4 * 8 + ((b + 1) % 2) * 4; + dest = origdest + (b + 1) / 2 * 4 * stride + ((b + 1) % 2) * 4; + pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4; } } #endif diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c index ad5258552..ae1912903 100644 --- a/vp8/encoder/dct.c +++ b/vp8/encoder/dct.c @@ -329,114 +329,9 @@ void vp8_short_fhaar2x2_c(short *input, short *output, int pitch) { // pitch = 8 } -#if CONFIG_HYBRIDTRANSFORM -void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) { - int i, j, k; - float bufa[16], bufb[16]; // buffers are for floating-point test purpose - // the implementation could be simplified in - // conjunction with integer transform - short *ip = input; - short *op = output; - - float *pfa = &bufa[0]; - float *pfb = &bufb[0]; - - // pointers to vertical and horizontal transforms - float *ptv, *pth; - - // load and convert residual array into floating-point - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = (float)ip[i]; - } - pfa += 4; - ip += pitch / 2; - } - - // vertical transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case ADST_DCT : - ptv = &adst_4[0]; - break; - - default : - ptv = &dct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfb[i] = 0; - for(k = 0; k < 4; k++) { - pfb[i] += ptv[k] * pfa[(k<<2)]; - } - pfa += 1; - } - pfb += 4; - ptv += 4; - pfa = &bufa[0]; - } - - // horizontal transformation - pfa = &bufa[0]; - pfb = &bufb[0]; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &adst_4[0]; - break; - - default : - pth = &dct_4[0]; - break; - } - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - pfa[i] = 0; - for(k = 0; k < 4; k++) { - pfa[i] += pfb[k] * pth[k]; - } - pth += 4; - } - - pfa += 4; - pfb += 4; - - switch(tx_type) { - case ADST_ADST : - case DCT_ADST : - pth = &adst_4[0]; - break; - - default : - pth = &dct_4[0]; - break; - } - } - - // convert to short integer format and load BLOCKD buffer - op = output ; - pfa = &bufa[0] ; - - for(j = 0; j < 4; j++) { - for(i = 0; i < 4; i++) { - op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) : - -(short)(- 8 * pfa[i] + 0.49); - } - op += 4; - pfa += 4; - } -} -#endif - -#if CONFIG_HYBRIDTRANSFORM8X8 -void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +void vp8_fht_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim) { int i, j, k; float bufa[64], bufb[64]; // buffers are for floating-point test purpose // the implementation could be simplified in @@ -451,11 +346,11 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { float *ptv, *pth; // load and convert residual array into floating-point - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = (float)ip[i]; } - pfa += 8; + pfa += tx_dim; ip += pitch / 2; } @@ -466,24 +361,24 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case ADST_DCT : - ptv = &adst_8[0]; + ptv = (tx_dim == 4) ? &adst_4[0] : &adst_8[0]; break; default : - ptv = &dct_8[0]; + ptv = (tx_dim == 4) ? &dct_4[0] : &dct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfb[i] = 0; - for(k = 0; k < 8; k++) { - pfb[i] += ptv[k] * pfa[(k<<3)]; + for(k = 0; k < tx_dim; k++) { + pfb[i] += ptv[k] * pfa[(k * tx_dim)]; } pfa += 1; } - pfb += 8; - ptv += 8; + pfb += tx_dim; + ptv += tx_dim; pfa = &bufa[0]; } @@ -494,34 +389,34 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &adst_8[0]; + pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0]; break; default : - pth = &dct_8[0]; + pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0]; break; } - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { pfa[i] = 0; - for(k = 0; k < 8; k++) { + for(k = 0; k < tx_dim; k++) { pfa[i] += pfb[k] * pth[k]; } - pth += 8; + pth += tx_dim; } - pfa += 8; - pfb += 8; + pfa += tx_dim; + pfb += tx_dim; switch(tx_type) { case ADST_ADST : case DCT_ADST : - pth = &adst_8[0]; + pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0]; break; default : - pth = &dct_8[0]; + pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0]; break; } } @@ -530,13 +425,13 @@ void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) { op = output ; pfa = &bufa[0] ; - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) { + for(j = 0; j < tx_dim; j++) { + for(i = 0; i < tx_dim; i++) { op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) : -(short)(- 8 * pfa[i] + 0.49); } - op += 8; - pfa += 8; + op += tx_dim; + pfa += tx_dim; } } #endif @@ -582,14 +477,6 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { } } -#if CONFIG_HYBRIDTRANSFORM -void vp8_fht8x4_c(short *input, short *output, int pitch, - TX_TYPE tx_type) { - vp8_fht4x4_c(input, output, pitch, tx_type); - vp8_fht4x4_c(input + 4, output + 16, pitch, tx_type); -} -#endif - void vp8_short_fdct8x4_c(short *input, short *output, int pitch) { vp8_short_fdct4x4_c(input, output, pitch); diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h index 9936969d5..2d7b61754 100644 --- a/vp8/encoder/dct.h +++ b/vp8/encoder/dct.h @@ -23,9 +23,9 @@ #endif -#if CONFIG_HYBRIDTRANSFORM -void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type); -void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type); +#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM +void vp8_fht_c(short *input, short *output, int pitch, + TX_TYPE tx_type, int tx_dim); #endif #if CONFIG_TX16X16 diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c index 964046d92..c40490163 100644 --- a/vp8/encoder/encodeintra.c +++ b/vp8/encoder/encodeintra.c @@ -91,8 +91,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, if(active_ht) { b->bmi.as_mode.test = b->bmi.as_mode.first; txfm_map(b, b->bmi.as_mode.first); - - vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type); + vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4); vp8_ht_quantize_b(be, b); vp8_inverse_htransform_b(IF_RTCD(&rtcd->common->idct), b, 32) ; } else { @@ -317,16 +316,11 @@ void vp8_encode_intra8x8(const VP8_ENCODER_RTCD *rtcd, vp8_subtract_4b_c(be, b, 16); txfm_map(b, pred_mode_conv(b->bmi.as_mode.first)); - - vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, - b->bmi.as_mode.tx_type); + vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, + b->bmi.as_mode.tx_type, 8); x->quantize_b_8x8(x->block + idx, xd->block + idx); - vp8_iht8x8llm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, - b->bmi.as_mode.tx_type); - -// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); -// x->quantize_b_8x8(x->block + idx, xd->block + idx); -// vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32); + vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32, + b->bmi.as_mode.tx_type, 8); // reconstruct submacroblock for (i = 0; i < 4; i++) { diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 3f2b8e85c..67bf33d6f 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -612,20 +612,20 @@ static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) { switch (b->bmi.as_mode.tx_type) { case ADST_DCT: - pt_scan = vp8_row_scan; + scan = vp8_row_scan; break; case DCT_ADST: - pt_scan = vp8_col_scan; + scan = vp8_col_scan; break; default: - pt_scan = vp8_default_zig_zag1d; + scan = vp8_default_zig_zag1d; break; } } else - pt_scan = vp8_default_zig_zag1d; + scan = vp8_default_zig_zag1d; } #endif break; @@ -937,8 +937,7 @@ static int64_t rd_pick_intra4x4block( if(active_ht) { b->bmi.as_mode.test = mode; txfm_map(b, mode); - - vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type); + vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4); vp8_ht_quantize_b(be, b); } else { x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32); @@ -991,7 +990,7 @@ static int64_t rd_pick_intra4x4block( // inverse transform if(active_ht) { - vp8_iht4x4llm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type ); + vp8_ihtllm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type, 4); } else { IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff, b->diff, 32); @@ -1230,8 +1229,8 @@ static int64_t rd_pick_intra8x8block( #if CONFIG_HYBRIDTRANSFORM8X8 txfm_map(b, pred_mode_conv(mode)); - vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, b->bmi.as_mode.tx_type); -// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32); + vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32, + b->bmi.as_mode.tx_type, 8); x->quantize_b_8x8(x->block + idx, xd->block + idx); // compute quantization mse of 8x8 block -- 2.40.0