extern prototype_second_order(vp8_short_inv_walsh4x4_1_lossless_c);
#endif
-#if CONFIG_HYBRIDTRANSFORM
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
#include "vp8/common/blockd.h"
-void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+void vp8_ihtllm_c(short *input, short *output, int pitch,
+ TX_TYPE tx_type, int tx_dim);
#endif
};
#endif
-#if CONFIG_HYBRIDTRANSFORM
-void vp8_iht4x4llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
- int i, j, k;
- float bufa[16], bufb[16]; // buffers are for floating-point test purpose
- // the implementation could be simplified in
- // conjunction with integer transform
- short *ip = input;
- short *op = output;
- int shortpitch = pitch >> 1;
-
- float *pfa = &bufa[0];
- float *pfb = &bufb[0];
-
- // pointers to vertical and horizontal transforms
- float *ptv, *pth;
-
- // load and convert residual array into floating-point
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- pfa[i] = (float)ip[i];
- }
- pfa += 4;
- ip += 4;
- }
-
- // vertical transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch(tx_type) {
- case ADST_ADST :
- case ADST_DCT :
- ptv = &iadst_4[0];
- break;
-
- default :
- ptv = &idct_4[0];
- break;
- }
-
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- pfb[i] = 0 ;
- for(k = 0; k < 4; k++) {
- pfb[i] += ptv[k] * pfa[(k<<2)];
- }
- pfa += 1;
- }
-
- pfb += 4;
- ptv += 4;
- pfa = &bufa[0];
- }
-
- // horizontal transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch(tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = &iadst_4[0];
- break;
-
- default :
- pth = &idct_4[0];
- break;
- }
-
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- pfa[i] = 0;
- for(k = 0; k < 4; k++) {
- pfa[i] += pfb[k] * pth[k];
- }
- pth += 4;
- }
-
- pfa += 4;
- pfb += 4;
-
- switch(tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = &iadst_4[0];
- break;
-
- default :
- pth = &idct_4[0];
- break;
- }
- }
-
- // convert to short integer format and load BLOCKD buffer
- op = output;
- pfa = &bufa[0];
-
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
- -(short)( - pfa[i] / 8 + 0.49);
- }
- op += shortpitch;
- pfa += 4;
- }
-}
-#endif
-
-#if CONFIG_HYBRIDTRANSFORM8X8
-void vp8_iht8x8llm_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+void vp8_ihtllm_c(short *input, short *output, int pitch,
+ TX_TYPE tx_type, int tx_dim) {
int i, j, k;
float bufa[64], bufb[64]; // buffers are for floating-point test purpose
// the implementation could be simplified in
// conjunction with integer transform
+
+ // further notice, since we are thinking to use one
+ // function for both 4x4 and 8x8 transforms, the
+ // temporary buffers are simply initialized with 64.
short *ip = input;
short *op = output;
int shortpitch = pitch >> 1;
float *ptv, *pth;
// load and convert residual array into floating-point
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
pfa[i] = (float)ip[i];
}
- pfa += 8;
- ip += 8;
+ pfa += tx_dim;
+ ip += tx_dim;
}
// vertical transformation
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
- ptv = &iadst_8[0];
+ ptv = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
break;
default :
- ptv = &idct_8[0];
+ ptv = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
break;
}
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
pfb[i] = 0 ;
- for(k = 0; k < 8; k++) {
- pfb[i] += ptv[k] * pfa[(k<<3)];
+ for(k = 0; k < tx_dim; k++) {
+ pfb[i] += ptv[k] * pfa[(k * tx_dim)];
}
pfa += 1;
}
- pfb += 8;
- ptv += 8;
+ pfb += tx_dim;
+ ptv += tx_dim;
pfa = &bufa[0];
}
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
- pth = &iadst_8[0];
+ pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
break;
default :
- pth = &idct_8[0];
+ pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
break;
}
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
pfa[i] = 0;
- for(k = 0; k < 8; k++) {
+ for(k = 0; k < tx_dim; k++) {
pfa[i] += pfb[k] * pth[k];
}
- pth += 8;
+ pth += tx_dim;
}
- pfa += 8;
- pfb += 8;
+ pfa += tx_dim;
+ pfb += tx_dim;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
- pth = &iadst_8[0];
+ pth = (tx_dim == 4) ? &iadst_4[0] : &iadst_8[0];
break;
default :
- pth = &idct_8[0];
+ pth = (tx_dim == 4) ? &idct_4[0] : &idct_8[0];
break;
}
}
op = output;
pfa = &bufa[0];
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( pfa[i] / 8 + 0.49) :
-(short)( - pfa[i] / 8 + 0.49);
}
+
op += shortpitch;
- pfa += 8;
+ pfa += tx_dim;
}
}
#endif
#if CONFIG_HYBRIDTRANSFORM
void vp8_inverse_htransform_b(const vp8_idct_rtcd_vtable_t *rtcd, BLOCKD *b, int pitch) {
- vp8_iht4x4llm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type);
+ vp8_ihtllm_c(b->dqcoeff, b->diff, pitch, b->bmi.as_mode.tx_type, 4);
}
#endif
txfm_map(b, pred_mode_conv(i8x8mode));
vp8_ht_dequant_idct_add_8x8_c(b->bmi.as_mode.tx_type,
q, dq, pre, dst, 16, stride);
- // vp8_dequant_idct_add_8x8_c(q, dq, pre, dst, 16, stride);
q += 64;
#else
for (j = 0; j < 4; j++) {
input[i] = dq[i] * input[i];
}
- vp8_iht4x4llm_c( input, output, 4 << 1, tx_type );
+ vp8_ihtllm_c(input, output, 4 << 1, tx_type, 4);
vpx_memset(input, 0, 32);
input[i] = dq[1] * input[i];
}
- vp8_iht8x8llm_c(input, output, 16, tx_type);
+ vp8_ihtllm_c(input, output, 16, tx_type, 8);
vpx_memset(input, 0, 128);
diff_ptr += 8;
pred += pitch;
}
- diff_ptr = output + (b + 1) / 2 * 4 * 8 + (b + 1) % 2 * 4;
- dest = origdest + (b + 1) / 2 * 4 * stride + (b + 1) % 2 * 4;
- pred = origpred + (b + 1) / 2 * 4 * pitch + (b + 1) % 2 * 4;
+ // shift buffer pointers to next 4x4 block in the submacroblock
+ diff_ptr = output + (b + 1) / 2 * 4 * 8 + ((b + 1) % 2) * 4;
+ dest = origdest + (b + 1) / 2 * 4 * stride + ((b + 1) % 2) * 4;
+ pred = origpred + (b + 1) / 2 * 4 * pitch + ((b + 1) % 2) * 4;
}
}
#endif
}
-#if CONFIG_HYBRIDTRANSFORM
-void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
- int i, j, k;
- float bufa[16], bufb[16]; // buffers are for floating-point test purpose
- // the implementation could be simplified in
- // conjunction with integer transform
- short *ip = input;
- short *op = output;
-
- float *pfa = &bufa[0];
- float *pfb = &bufb[0];
-
- // pointers to vertical and horizontal transforms
- float *ptv, *pth;
-
- // load and convert residual array into floating-point
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- pfa[i] = (float)ip[i];
- }
- pfa += 4;
- ip += pitch / 2;
- }
-
- // vertical transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch(tx_type) {
- case ADST_ADST :
- case ADST_DCT :
- ptv = &adst_4[0];
- break;
-
- default :
- ptv = &dct_4[0];
- break;
- }
-
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- pfb[i] = 0;
- for(k = 0; k < 4; k++) {
- pfb[i] += ptv[k] * pfa[(k<<2)];
- }
- pfa += 1;
- }
- pfb += 4;
- ptv += 4;
- pfa = &bufa[0];
- }
-
- // horizontal transformation
- pfa = &bufa[0];
- pfb = &bufb[0];
-
- switch(tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = &adst_4[0];
- break;
-
- default :
- pth = &dct_4[0];
- break;
- }
-
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- pfa[i] = 0;
- for(k = 0; k < 4; k++) {
- pfa[i] += pfb[k] * pth[k];
- }
- pth += 4;
- }
-
- pfa += 4;
- pfb += 4;
-
- switch(tx_type) {
- case ADST_ADST :
- case DCT_ADST :
- pth = &adst_4[0];
- break;
-
- default :
- pth = &dct_4[0];
- break;
- }
- }
-
- // convert to short integer format and load BLOCKD buffer
- op = output ;
- pfa = &bufa[0] ;
-
- for(j = 0; j < 4; j++) {
- for(i = 0; i < 4; i++) {
- op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
- -(short)(- 8 * pfa[i] + 0.49);
- }
- op += 4;
- pfa += 4;
- }
-}
-#endif
-
-#if CONFIG_HYBRIDTRANSFORM8X8
-void vp8_fht8x8_c(short *input, short *output, int pitch, TX_TYPE tx_type) {
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+void vp8_fht_c(short *input, short *output, int pitch,
+ TX_TYPE tx_type, int tx_dim) {
int i, j, k;
float bufa[64], bufb[64]; // buffers are for floating-point test purpose
// the implementation could be simplified in
float *ptv, *pth;
// load and convert residual array into floating-point
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
pfa[i] = (float)ip[i];
}
- pfa += 8;
+ pfa += tx_dim;
ip += pitch / 2;
}
switch(tx_type) {
case ADST_ADST :
case ADST_DCT :
- ptv = &adst_8[0];
+ ptv = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
break;
default :
- ptv = &dct_8[0];
+ ptv = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
break;
}
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
pfb[i] = 0;
- for(k = 0; k < 8; k++) {
- pfb[i] += ptv[k] * pfa[(k<<3)];
+ for(k = 0; k < tx_dim; k++) {
+ pfb[i] += ptv[k] * pfa[(k * tx_dim)];
}
pfa += 1;
}
- pfb += 8;
- ptv += 8;
+ pfb += tx_dim;
+ ptv += tx_dim;
pfa = &bufa[0];
}
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
- pth = &adst_8[0];
+ pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
break;
default :
- pth = &dct_8[0];
+ pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
break;
}
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
pfa[i] = 0;
- for(k = 0; k < 8; k++) {
+ for(k = 0; k < tx_dim; k++) {
pfa[i] += pfb[k] * pth[k];
}
- pth += 8;
+ pth += tx_dim;
}
- pfa += 8;
- pfb += 8;
+ pfa += tx_dim;
+ pfb += tx_dim;
switch(tx_type) {
case ADST_ADST :
case DCT_ADST :
- pth = &adst_8[0];
+ pth = (tx_dim == 4) ? &adst_4[0] : &adst_8[0];
break;
default :
- pth = &dct_8[0];
+ pth = (tx_dim == 4) ? &dct_4[0] : &dct_8[0];
break;
}
}
op = output ;
pfa = &bufa[0] ;
- for(j = 0; j < 8; j++) {
- for(i = 0; i < 8; i++) {
+ for(j = 0; j < tx_dim; j++) {
+ for(i = 0; i < tx_dim; i++) {
op[i] = (pfa[i] > 0 ) ? (short)( 8 * pfa[i] + 0.49) :
-(short)(- 8 * pfa[i] + 0.49);
}
- op += 8;
- pfa += 8;
+ op += tx_dim;
+ pfa += tx_dim;
}
}
#endif
}
}
-#if CONFIG_HYBRIDTRANSFORM
-void vp8_fht8x4_c(short *input, short *output, int pitch,
- TX_TYPE tx_type) {
- vp8_fht4x4_c(input, output, pitch, tx_type);
- vp8_fht4x4_c(input + 4, output + 16, pitch, tx_type);
-}
-#endif
-
void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
{
vp8_short_fdct4x4_c(input, output, pitch);
#endif
-#if CONFIG_HYBRIDTRANSFORM
-void vp8_fht4x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
-void vp8_fht8x4_c(short *input, short *output, int pitch, TX_TYPE tx_type);
+#if CONFIG_HYBRIDTRANSFORM8X8 || CONFIG_HYBRIDTRANSFORM
+void vp8_fht_c(short *input, short *output, int pitch,
+ TX_TYPE tx_type, int tx_dim);
#endif
#if CONFIG_TX16X16
if(active_ht) {
b->bmi.as_mode.test = b->bmi.as_mode.first;
txfm_map(b, b->bmi.as_mode.first);
-
- vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
+ vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4);
vp8_ht_quantize_b(be, b);
vp8_inverse_htransform_b(IF_RTCD(&rtcd->common->idct), b, 32) ;
} else {
vp8_subtract_4b_c(be, b, 16);
txfm_map(b, pred_mode_conv(b->bmi.as_mode.first));
-
- vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32,
- b->bmi.as_mode.tx_type);
+ vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
+ b->bmi.as_mode.tx_type, 8);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
- vp8_iht8x8llm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
- b->bmi.as_mode.tx_type);
-
-// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
-// x->quantize_b_8x8(x->block + idx, xd->block + idx);
-// vp8_short_idct8x8_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32);
+ vp8_ihtllm_c(xd->block[idx].dqcoeff, xd->block[ib].diff, 32,
+ b->bmi.as_mode.tx_type, 8);
// reconstruct submacroblock
for (i = 0; i < 4; i++) {
if((type == PLANE_TYPE_Y_WITH_DC) && active_ht) {
switch (b->bmi.as_mode.tx_type) {
case ADST_DCT:
- pt_scan = vp8_row_scan;
+ scan = vp8_row_scan;
break;
case DCT_ADST:
- pt_scan = vp8_col_scan;
+ scan = vp8_col_scan;
break;
default:
- pt_scan = vp8_default_zig_zag1d;
+ scan = vp8_default_zig_zag1d;
break;
}
} else
- pt_scan = vp8_default_zig_zag1d;
+ scan = vp8_default_zig_zag1d;
}
#endif
break;
if(active_ht) {
b->bmi.as_mode.test = mode;
txfm_map(b, mode);
-
- vp8_fht4x4_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type);
+ vp8_fht_c(be->src_diff, be->coeff, 32, b->bmi.as_mode.tx_type, 4);
vp8_ht_quantize_b(be, b);
} else {
x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
// inverse transform
if(active_ht) {
- vp8_iht4x4llm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type );
+ vp8_ihtllm_c(best_dqcoeff, b->diff, 32, b->bmi.as_mode.tx_type, 4);
} else {
IDCT_INVOKE(IF_RTCD(&cpi->rtcd.common->idct), idct16)(best_dqcoeff,
b->diff, 32);
#if CONFIG_HYBRIDTRANSFORM8X8
txfm_map(b, pred_mode_conv(mode));
- vp8_fht8x8_c(be->src_diff, (x->block + idx)->coeff, 32, b->bmi.as_mode.tx_type);
-// x->vp8_short_fdct8x8(be->src_diff, (x->block + idx)->coeff, 32);
+ vp8_fht_c(be->src_diff, (x->block + idx)->coeff, 32,
+ b->bmi.as_mode.tx_type, 8);
x->quantize_b_8x8(x->block + idx, xd->block + idx);
// compute quantization mse of 8x8 block