From cc57c5e4afdb87bce648e20565f24726019da95a Mon Sep 17 00:00:00 2001 From: Deb Mukherjee Date: Thu, 6 Nov 2014 12:50:33 -0800 Subject: [PATCH] Iadst transforms to use internal low precision Change-Id: I266777d40c300bc53b45b205144520b85b0d6e58 (cherry picked from commit a1b726117f5470f227bc90cd030b7d25045dc510) --- vp9/common/vp9_idct.c | 104 +++++++++++++++++++----------------------- 1 file changed, 47 insertions(+), 57 deletions(-) diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index cc261abe7..b2c78c790 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -276,10 +276,10 @@ void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { static void iadst4(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - tran_high_t x0 = input[0]; - tran_high_t x1 = input[1]; - tran_high_t x2 = input[2]; - tran_high_t x3 = input[3]; + tran_low_t x0 = input[0]; + tran_low_t x1 = input[1]; + tran_low_t x2 = input[2]; + tran_low_t x3 = input[3]; if (!(x0 | x1 | x2 | x3)) { output[0] = output[1] = output[2] = output[3] = 0; @@ -295,24 +295,19 @@ static void iadst4(const tran_low_t *input, tran_low_t *output) { s6 = sinpi_4_9 * x3; s7 = x0 - x2 + x3; - x0 = s0 + s3 + s5; - x1 = s1 - s4 - s6; - x2 = sinpi_3_9 * s7; - x3 = s2; - - s0 = x0 + x3; - s1 = x1 + x3; - s2 = x2; - s3 = x0 + x1 - x3; + s0 = s0 + s3 + s5; + s1 = s1 - s4 - s6; + s3 = s2; + s2 = sinpi_3_9 * s7; // 1-D transform scaling factor is sqrt(2). // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. - output[0] = WRAPLOW(dct_const_round_shift(s0), 8); - output[1] = WRAPLOW(dct_const_round_shift(s1), 8); + output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8); + output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8); output[2] = WRAPLOW(dct_const_round_shift(s2), 8); - output[3] = WRAPLOW(dct_const_round_shift(s3), 8); + output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8); } void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, @@ -1702,10 +1697,10 @@ void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - tran_high_t x0 = input[0]; - tran_high_t x1 = input[1]; - tran_high_t x2 = input[2]; - tran_high_t x3 = input[3]; + tran_low_t x0 = input[0]; + tran_low_t x1 = input[1]; + tran_low_t x2 = input[2]; + tran_low_t x3 = input[3]; (void) bd; if (!(x0 | x1 | x2 | x3)) { @@ -1720,26 +1715,21 @@ static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { s4 = sinpi_1_9 * x2; s5 = sinpi_2_9 * x3; s6 = sinpi_4_9 * x3; - s7 = x0 - x2 + x3; - - x0 = s0 + s3 + s5; - x1 = s1 - s4 - s6; - x2 = sinpi_3_9 * s7; - x3 = s2; + s7 = (tran_high_t)(x0 - x2 + x3); - s0 = x0 + x3; - s1 = x1 + x3; - s2 = x2; - s3 = x0 + x1 - x3; + s0 = s0 + s3 + s5; + s1 = s1 - s4 - s6; + s3 = s2; + s2 = sinpi_3_9 * s7; // 1-D transform scaling factor is sqrt(2). // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. - output[0] = WRAPLOW(dct_const_round_shift(s0), bd); - output[1] = WRAPLOW(dct_const_round_shift(s1), bd); + output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd); + output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd); output[2] = WRAPLOW(dct_const_round_shift(s2), bd); - output[3] = WRAPLOW(dct_const_round_shift(s3), bd); + output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); } void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, @@ -1779,14 +1769,14 @@ void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; - tran_high_t x0 = input[7]; - tran_high_t x1 = input[0]; - tran_high_t x2 = input[5]; - tran_high_t x3 = input[2]; - tran_high_t x4 = input[3]; - tran_high_t x5 = input[4]; - tran_high_t x6 = input[1]; - tran_high_t x7 = input[6]; + tran_low_t x0 = input[7]; + tran_low_t x1 = input[0]; + tran_low_t x2 = input[5]; + tran_low_t x3 = input[2]; + tran_low_t x4 = input[3]; + tran_low_t x5 = input[4]; + tran_low_t x6 = input[1]; + tran_low_t x7 = input[6]; (void) bd; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { @@ -2113,22 +2103,22 @@ static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s9, s10, s11, s12, s13, s14, s15; - tran_high_t x0 = input[15]; - tran_high_t x1 = input[0]; - tran_high_t x2 = input[13]; - tran_high_t x3 = input[2]; - tran_high_t x4 = input[11]; - tran_high_t x5 = input[4]; - tran_high_t x6 = input[9]; - tran_high_t x7 = input[6]; - tran_high_t x8 = input[7]; - tran_high_t x9 = input[8]; - tran_high_t x10 = input[5]; - tran_high_t x11 = input[10]; - tran_high_t x12 = input[3]; - tran_high_t x13 = input[12]; - tran_high_t x14 = input[1]; - tran_high_t x15 = input[14]; + tran_low_t x0 = input[15]; + tran_low_t x1 = input[0]; + tran_low_t x2 = input[13]; + tran_low_t x3 = input[2]; + tran_low_t x4 = input[11]; + tran_low_t x5 = input[4]; + tran_low_t x6 = input[9]; + tran_low_t x7 = input[6]; + tran_low_t x8 = input[7]; + tran_low_t x9 = input[8]; + tran_low_t x10 = input[5]; + tran_low_t x11 = input[10]; + tran_low_t x12 = input[3]; + tran_low_t x13 = input[12]; + tran_low_t x14 = input[1]; + tran_low_t x15 = input[14]; (void) bd; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 -- 2.40.0