From: Yaowu Xu Date: Fri, 22 Feb 2013 19:14:04 +0000 (-0800) Subject: optimize 8x8 fdct rounding for accuracy X-Git-Tag: v1.3.0~1151^2~142^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22012ee99416dae8640e1b72009ea9aeaa143850;p=libvpx optimize 8x8 fdct rounding for accuracy The commit added a final rounding choice for 8x8 forward dct to get rid of a sign bias at DC position and improve the accuracry in term of round trip error for 8x8 fDCT/iDCT. This commit also enabled forward 8x8 dct test. Change-Id: Ib67f99b0a24d513e230c7812bc04569d472fdc50 --- diff --git a/test/fdct8x8_test.cc b/test/fdct8x8_test.cc index d82f7c3bd..1a3e24009 100644 --- a/test/fdct8x8_test.cc +++ b/test/fdct8x8_test.cc @@ -141,7 +141,7 @@ TEST(VP9Fdct8x8Test, ExtremalCheck) { // Initialize a test block with input range {-255, 255}. for (int j = 0; j < 64; ++j) - test_input_block[j] = rnd.Rand8() % 2 ? 255 : -255; + test_input_block[j] = rnd.Rand8() % 2 ? 255 : -256; const int pitch = 16; vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch); diff --git a/test/test.mk b/test/test.mk index 557eafd82..29657625c 100644 --- a/test/test.mk +++ b/test/test.mk @@ -72,7 +72,7 @@ endif LIBVPX_TEST_SRCS-$(CONFIG_VP9) += convolve_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc -#LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc +LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc #LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc diff --git a/vp9/encoder/vp9_dct.c b/vp9/encoder/vp9_dct.c index 4a1e78e93..a459e949b 100644 --- a/vp9/encoder/vp9_dct.c +++ b/vp9/encoder/vp9_dct.c @@ -323,247 +323,6 @@ static const int16_t adst_i16[256] = { }; #endif -#define NEW_FDCT8x8 1 -#if !NEW_FDCT8x8 -static const int xC1S7 = 16069; -static const int xC2S6 = 15137; -static const int xC3S5 = 13623; -static const int xC4S4 = 11585; -static const int xC5S3 = 9102; -static const int xC6S2 = 6270; -static const int xC7S1 = 3196; - -#define SHIFT_BITS 14 -#define DOROUND(X) X += (1<<(SHIFT_BITS-1)); - -#define FINAL_SHIFT 3 -#define FINAL_ROUNDING (1<<(FINAL_SHIFT -1)) -#define IN_SHIFT (FINAL_SHIFT+1) - - -void vp9_short_fdct8x8_c(short *InputData, short *OutputData, int pitch) { - int loop; - int short_pitch = pitch >> 1; - int is07, is12, is34, is56; - int is0734, is1256; - int id07, id12, id34, id56; - int irot_input_x, irot_input_y; - int icommon_product1; // Re-used product (c4s4 * (s12 - s56)) - int icommon_product2; // Re-used product (c4s4 * (d12 + d56)) - int temp1, temp2; // intermediate variable for computation - - int InterData[64]; - int *ip = InterData; - short *op = OutputData; - - for (loop = 0; loop < 8; loop++) { - // Pre calculate some common sums and differences. - is07 = (InputData[0] + InputData[7]) << IN_SHIFT; - is12 = (InputData[1] + InputData[2]) << IN_SHIFT; - is34 = (InputData[3] + InputData[4]) << IN_SHIFT; - is56 = (InputData[5] + InputData[6]) << IN_SHIFT; - id07 = (InputData[0] - InputData[7]) << IN_SHIFT; - id12 = (InputData[1] - InputData[2]) << IN_SHIFT; - id34 = (InputData[3] - InputData[4]) << IN_SHIFT; - id56 = (InputData[5] - InputData[6]) << IN_SHIFT; - - is0734 = is07 + is34; - is1256 = is12 + is56; - - // Pre-Calculate some common product terms. - icommon_product1 = xC4S4 * (is12 - is56); - DOROUND(icommon_product1) - icommon_product1 >>= SHIFT_BITS; - - icommon_product2 = xC4S4 * (id12 + id56); - DOROUND(icommon_product2) - icommon_product2 >>= SHIFT_BITS; - - - ip[0] = (xC4S4 * (is0734 + is1256)); - DOROUND(ip[0]); - ip[0] >>= SHIFT_BITS; - - ip[4] = (xC4S4 * (is0734 - is1256)); - DOROUND(ip[4]); - ip[4] >>= SHIFT_BITS; - - // Define inputs to rotation for outputs 2 and 6 - irot_input_x = id12 - id56; - irot_input_y = is07 - is34; - - // Apply rotation for outputs 2 and 6. - temp1 = xC6S2 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[2] = temp1 + temp2; - - temp1 = xC6S2 * irot_input_y; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_x; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[6] = temp1 - temp2; - - // Define inputs to rotation for outputs 1 and 7 - irot_input_x = icommon_product1 + id07; - irot_input_y = -(id34 + icommon_product2); - - // Apply rotation for outputs 1 and 7. - temp1 = xC1S7 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC7S1 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[1] = temp1 - temp2; - - temp1 = xC7S1 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC1S7 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[7] = temp1 + temp2; - - // Define inputs to rotation for outputs 3 and 5 - irot_input_x = id07 - icommon_product1; - irot_input_y = id34 - icommon_product2; - - // Apply rotation for outputs 3 and 5. - temp1 = xC3S5 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC5S3 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[3] = temp1 - temp2; - - - temp1 = xC5S3 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC3S5 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - ip[5] = temp1 + temp2; - - // Increment data pointer for next row - InputData += short_pitch; - ip += 8; - } - - // Performed DCT on rows, now transform the columns - ip = InterData; - for (loop = 0; loop < 8; loop++) { - // Pre calculate some common sums and differences. - is07 = ip[0 * 8] + ip[7 * 8]; - is12 = ip[1 * 8] + ip[2 * 8]; - is34 = ip[3 * 8] + ip[4 * 8]; - is56 = ip[5 * 8] + ip[6 * 8]; - - id07 = ip[0 * 8] - ip[7 * 8]; - id12 = ip[1 * 8] - ip[2 * 8]; - id34 = ip[3 * 8] - ip[4 * 8]; - id56 = ip[5 * 8] - ip[6 * 8]; - - is0734 = is07 + is34; - is1256 = is12 + is56; - - // Pre-Calculate some common product terms - icommon_product1 = xC4S4 * (is12 - is56); - icommon_product2 = xC4S4 * (id12 + id56); - DOROUND(icommon_product1) - DOROUND(icommon_product2) - icommon_product1 >>= SHIFT_BITS; - icommon_product2 >>= SHIFT_BITS; - - - temp1 = xC4S4 * (is0734 + is1256); - temp2 = xC4S4 * (is0734 - is1256); - DOROUND(temp1); - DOROUND(temp2); - temp1 >>= SHIFT_BITS; - - temp2 >>= SHIFT_BITS; - op[0 * 8] = (temp1 + FINAL_ROUNDING) >> FINAL_SHIFT; - op[4 * 8] = (temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Define inputs to rotation for outputs 2 and 6 - irot_input_x = id12 - id56; - irot_input_y = is07 - is34; - - // Apply rotation for outputs 2 and 6. - temp1 = xC6S2 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[2 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - temp1 = xC6S2 * irot_input_y; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC2S6 * irot_input_x; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[6 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Define inputs to rotation for outputs 1 and 7 - irot_input_x = icommon_product1 + id07; - irot_input_y = -(id34 + icommon_product2); - - // Apply rotation for outputs 1 and 7. - temp1 = xC1S7 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC7S1 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[1 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - temp1 = xC7S1 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC1S7 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[7 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Define inputs to rotation for outputs 3 and 5 - irot_input_x = id07 - icommon_product1; - irot_input_y = id34 - icommon_product2; - - // Apply rotation for outputs 3 and 5. - temp1 = xC3S5 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC5S3 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[3 * 8] = (temp1 - temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - - temp1 = xC5S3 * irot_input_x; - DOROUND(temp1); - temp1 >>= SHIFT_BITS; - temp2 = xC3S5 * irot_input_y; - DOROUND(temp2); - temp2 >>= SHIFT_BITS; - op[5 * 8] = (temp1 + temp2 + FINAL_ROUNDING) >> FINAL_SHIFT; - - // Increment data pointer for next column. - ip++; - op++; - } -} -#endif - /* For test */ #define TEST_INT 1 #if TEST_INT @@ -918,7 +677,6 @@ void vp9_short_fdct8x4_c(short *input, short *output, int pitch) vp9_short_fdct4x4_c(input + 4, output + 16, pitch); } -#if NEW_FDCT8x8 static void fdct8_1d(int16_t *input, int16_t *output) { int16_t step[8]; int temp1, temp2; @@ -986,10 +744,9 @@ void vp9_short_fdct8x8_c(int16_t *input, int16_t *output, int pitch) { temp_in[j] = out[j + i * 8]; fdct8_1d(temp_in, temp_out); for (j = 0; j < 8; ++j) - output[j + i * 8] = temp_out[j] >> 1; + output[j + i * 8] = temp_out[j] / 2; } } -#endif #if CONFIG_INTHT static void fadst8_1d(int16_t *input, int16_t *output) {