From: Yunqing Wang Date: Wed, 13 Feb 2013 08:19:32 +0000 (-0800) Subject: Rewrote fdct16x16 X-Git-Tag: v1.3.0~1151^2~170^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=048b9d41a6718e5b491dc9dd3fbaa9939fa07cb5;p=libvpx Rewrote fdct16x16 Used same algorithm as others. Change-Id: Ifdac560762aec9735cb4bb6f1dbf549e415c38a0 --- diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 746648291..230c7e986 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -1250,6 +1250,9 @@ void vp9_short_fdct16x16_c(short *input, short *out, int pitch) { } #else + +#define NEW_FDCT16 1 +#if !NEW_FDCT16 static const int16_t C1 = 16305; static const int16_t C2 = 16069; static const int16_t C3 = 15679; @@ -1469,6 +1472,137 @@ void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { } #undef RIGHT_SHIFT #undef ROUNDING + +#else +// Rewrote to use same algorithm as others. +static void fdct16_1d(int16_t input[16], int16_t output[16]) { + int16_t step[16]; + int temp1, temp2; + + // step 1 + step[ 0] = input[0] + input[15]; + step[ 1] = input[1] + input[14]; + step[ 2] = input[2] + input[13]; + step[ 3] = input[3] + input[12]; + step[ 4] = input[4] + input[11]; + step[ 5] = input[5] + input[10]; + step[ 6] = input[6] + input[ 9]; + step[ 7] = input[7] + input[ 8]; + step[ 8] = input[7] - input[ 8]; + step[ 9] = input[6] - input[ 9]; + step[10] = input[5] - input[10]; + step[11] = input[4] - input[11]; + step[12] = input[3] - input[12]; + step[13] = input[2] - input[13]; + step[14] = input[1] - input[14]; + step[15] = input[0] - input[15]; + + fdct8_1d(step, step); + + // step 2 + output[8] = step[8]; + output[9] = step[9]; + temp1 = (-step[10] + step[13]) * cospi_16_64; + temp2 = (-step[11] + step[12]) * cospi_16_64; + output[10] = dct_const_round_shift(temp1); + output[11] = dct_const_round_shift(temp2); + temp1 = (step[11] + step[12]) * cospi_16_64; + temp2 = (step[10] + step[13]) * cospi_16_64; + output[12] = dct_const_round_shift(temp1); + output[13] = dct_const_round_shift(temp2); + output[14] = step[14]; + output[15] = step[15]; + + // step 3 + step[ 8] = output[8] + output[11]; + step[ 9] = output[9] + output[10]; + step[ 10] = output[9] - output[10]; + step[ 11] = output[8] - output[11]; + step[ 12] = -output[12] + output[15]; + step[ 13] = -output[13] + output[14]; + step[ 14] = output[13] + output[14]; + step[ 15] = output[12] + output[15]; + + // step 4 + output[8] = step[8]; + temp1 = -step[9] * cospi_8_64 + step[14] * cospi_24_64; + temp2 = -step[10] * cospi_24_64 - step[13] * cospi_8_64; + output[9] = dct_const_round_shift(temp1); + output[10] = dct_const_round_shift(temp2); + output[11] = step[11]; + output[12] = step[12]; + temp1 = -step[10] * cospi_8_64 + step[13] * cospi_24_64; + temp2 = step[9] * cospi_24_64 + step[14] * cospi_8_64; + output[13] = dct_const_round_shift(temp1); + output[14] = dct_const_round_shift(temp2); + output[15] = step[15]; + + // step 5 + step[8] = output[8] + output[9]; + step[9] = output[8] - output[9]; + step[10] = -output[10] + output[11]; + step[11] = output[10] + output[11]; + step[12] = output[12] + output[13]; + step[13] = output[12] - output[13]; + step[14] = -output[14] + output[15]; + step[15] = output[14] + output[15]; + + // step 6 + output[0] = step[0]; + output[8] = step[4]; + output[4] = step[2]; + output[12] = step[6]; + output[2] = step[1]; + output[10] = step[5]; + output[6] = step[3]; + output[14] = step[7]; + + temp1 = step[8] * cospi_30_64 + step[15] * cospi_2_64; + temp2 = step[9] * cospi_14_64 + step[14] * cospi_18_64; + output[1] = dct_const_round_shift(temp1); + output[9] = dct_const_round_shift(temp2); + + temp1 = step[10] * cospi_22_64 + step[13] * cospi_10_64; + temp2 = step[11] * cospi_6_64 + step[12] * cospi_26_64; + output[5] = dct_const_round_shift(temp1); + output[13] = dct_const_round_shift(temp2); + + temp1 = -step[11] * cospi_26_64 + step[12] * cospi_6_64; + temp2 = -step[10] * cospi_10_64 + step[13] * cospi_22_64; + output[3] = dct_const_round_shift(temp1); + output[11] = dct_const_round_shift(temp2); + + temp1 = -step[9] * cospi_18_64 + step[14] * cospi_14_64; + temp2 = -step[8] * cospi_2_64 + step[15] * cospi_30_64; + output[7] = dct_const_round_shift(temp1); + output[15] = dct_const_round_shift(temp2); +} + +void vp9_short_fdct16x16_c(int16_t *input, int16_t *out, int pitch) { + int shortpitch = pitch >> 1; + int i, j; + int16_t output[256]; + int16_t temp_in[16], temp_out[16]; + + // First transform columns + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j++) + temp_in[j] = input[j * shortpitch + i]; + fdct16_1d(temp_in, temp_out); + for (j = 0; j < 16; j++) + output[j * 16 + i] = temp_out[j]; + } + + // Then transform rows + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = output[j + i * 16]; + fdct16_1d(temp_in, temp_out); + for (j = 0; j < 16; ++j) + out[j + i * 16] = temp_out[j]; + } +} +#endif #endif #define TEST_INT_32x32_DCT 1