From: Debargha Mukherjee Date: Tue, 29 Sep 2015 11:03:39 +0000 (-0700) Subject: Speed up of DST and the search in ext_tx X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3e8cceb3fc85fbd09135024ae1398647a7cb7d3a;p=libvpx Speed up of DST and the search in ext_tx Adds an early termination to the ext_tx search, and also implements the DST transforms more efficiently. About 4 times faster with the ext-tx experiment. There is a 0.09% drop in performance on derflr from 1.735% to 1.648%, but worth it with the speedup achieved. Change-Id: I2ede9d69c557f25e0a76cd5d701cc0e36e825c7c --- diff --git a/vp10/common/blockd.h b/vp10/common/blockd.h index af39d96be..8a3e6cceb 100644 --- a/vp10/common/blockd.h +++ b/vp10/common/blockd.h @@ -242,13 +242,13 @@ static TX_TYPE ext_tx_to_txtype[EXT_TX_TYPES] = { FLIPADST_FLIPADST, ADST_FLIPADST, FLIPADST_ADST, - DST_DST, DST_DCT, DCT_DST, DST_ADST, ADST_DST, DST_FLIPADST, FLIPADST_DST, + DST_DST, }; #endif // CONFIG_EXT_TX diff --git a/vp10/common/entropymode.c b/vp10/common/entropymode.c index 110af7826..1fb28af62 100644 --- a/vp10/common/entropymode.c +++ b/vp10/common/entropymode.c @@ -317,17 +317,17 @@ static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] #if CONFIG_EXT_TX const vpx_tree_index vp10_ext_tx_tree[TREE_SIZE(EXT_TX_TYPES)] = { -NORM, 2, - -ALT9, 4, + -ALT15, 4, 6, 16, 8, 10, - -ALT10, -ALT11, + -ALT9, -ALT10, 12, 14, -ALT1, -ALT2, -ALT4, -ALT5, 18, 24, 20, 22, - -ALT12, -ALT13, - -ALT14, -ALT15, + -ALT11, -ALT12, + -ALT13, -ALT14, 26, 28, -ALT3, -ALT6, -ALT7, -ALT8 diff --git a/vp10/common/enums.h b/vp10/common/enums.h index a4a544089..d63ed4e22 100644 --- a/vp10/common/enums.h +++ b/vp10/common/enums.h @@ -100,13 +100,13 @@ typedef enum { FLIPADST_FLIPADST = 6, ADST_FLIPADST = 7, FLIPADST_ADST = 8, - DST_DST = 9, - DST_DCT = 10, - DCT_DST = 11, - DST_ADST = 12, - ADST_DST = 13, - DST_FLIPADST = 14, - FLIPADST_DST = 15, + DST_DCT = 9, + DCT_DST = 10, + DST_ADST = 11, + ADST_DST = 12, + DST_FLIPADST = 13, + FLIPADST_DST = 14, + DST_DST = 15, #endif // CONFIG_EXT_TX TX_TYPES, } TX_TYPE; diff --git a/vp10/common/idct.c b/vp10/common/idct.c index 62505eab1..5607c6f6f 100644 --- a/vp10/common/idct.c +++ b/vp10/common/idct.c @@ -19,159 +19,332 @@ #if CONFIG_EXT_TX void idst4_c(const tran_low_t *input, tran_low_t *output) { - static const int N = 4; // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2) static const int32_t sinvalue_lookup[] = { 141124871, 228344838, }; - int i, j; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1); - } - sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - output[i] = WRAPLOW(sum, 8); - } + int64_t sum; + int64_t s03 = (input[0] + input[3]); + int64_t d03 = (input[0] - input[3]); + int64_t s12 = (input[1] + input[2]); + int64_t d12 = (input[1] - input[2]); + sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); } void idst8_c(const tran_low_t *input, tran_low_t *output) { - static const int N = 8; // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2 static const int32_t sinvalue_lookup[] = { 86559612, 162678858, 219176632, 249238470 }; - int i, j; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1); - } - sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - output[i] = WRAPLOW(sum, 8); - } + int64_t sum; + int64_t s07 = (input[0] + input[7]); + int64_t d07 = (input[0] - input[7]); + int64_t s16 = (input[1] + input[6]); + int64_t d16 = (input[1] - input[6]); + int64_t s25 = (input[2] + input[5]); + int64_t d25 = (input[2] - input[5]); + int64_t s34 = (input[3] + input[4]); + int64_t d34 = (input[3] - input[4]); + sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] + + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] + + d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = (s07 + s16 - s34)* sinvalue_lookup[2]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] - + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] - + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = (d07 - d16 + d34)* sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] + + s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] + + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); } void idst16_c(const tran_low_t *input, tran_low_t *output) { - static const int N = 16; // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2) static const int32_t sinvalue_lookup[] = { 47852167, 94074787, 137093803, 175444254, 207820161, 233119001, 250479254, 259309736 }; - int i, j; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1); - } - sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - output[i] = WRAPLOW(sum, 8); - } + int64_t sum; + int64_t s015 = (input[0] + input[15]); + int64_t d015 = (input[0] - input[15]); + int64_t s114 = (input[1] + input[14]); + int64_t d114 = (input[1] - input[14]); + int64_t s213 = (input[2] + input[13]); + int64_t d213 = (input[2] - input[13]); + int64_t s312 = (input[3] + input[12]); + int64_t d312 = (input[3] - input[12]); + int64_t s411 = (input[4] + input[11]); + int64_t d411 = (input[4] - input[11]); + int64_t s510 = (input[5] + input[10]); + int64_t d510 = (input[5] - input[10]); + int64_t s69 = (input[6] + input[9]); + int64_t d69 = (input[6] - input[9]); + int64_t s78 = (input[7] + input[8]); + int64_t d78 = (input[7] - input[8]); + sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] + + s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] + + s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] + + s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] + + d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] + + d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] + + d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] + + s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] + + s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] - + s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] + + d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] - + d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] - + d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] + + s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] - + s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] + + s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] - + d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] - + d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] + + d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] - + s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] + + s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] + + s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] - + d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] + + d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] - + d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] - + s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] + + s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] - + s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3]; + output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] - + d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] + + d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] + + d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4]; + output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] - + s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] - + s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] + + s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2]; + output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] + + d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] - + d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] + + d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5]; + output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] + + s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] - + s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] - + s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1]; + output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] + + d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] + + d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] - + d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6]; + output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] + + s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] + + s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] + + s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0]; + output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); + sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] + + d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] + + d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] + + d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7]; + output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8); } #if CONFIG_VP9_HIGHBITDEPTH void highbd_idst4_c(const tran_low_t *input, tran_low_t *output, int bd) { - static const int N = 4; // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2) static const int32_t sinvalue_lookup[] = { 141124871, 228344838, }; - int i, j; - (void) bd; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1); - } - sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - output[i] = WRAPLOW(sum, bd); - } + int64_t sum; + int64_t s03 = (input[0] + input[3]); + int64_t d03 = (input[0] - input[3]); + int64_t s12 = (input[1] + input[2]); + int64_t d12 = (input[1] - input[2]); + + sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); } void highbd_idst8_c(const tran_low_t *input, tran_low_t *output, int bd) { - static const int N = 8; // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2 static const int32_t sinvalue_lookup[] = { 86559612, 162678858, 219176632, 249238470 }; - int i, j; - (void) bd; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1); - } - sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - output[i] = WRAPLOW(sum, bd); - } + int64_t sum; + int64_t s07 = (input[0] + input[7]); + int64_t d07 = (input[0] - input[7]); + int64_t s16 = (input[1] + input[6]); + int64_t d16 = (input[1] - input[6]); + int64_t s25 = (input[2] + input[5]); + int64_t d25 = (input[2] - input[5]); + int64_t s34 = (input[3] + input[4]); + int64_t d34 = (input[3] - input[4]); + + sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] + + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] + + d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = (s07 + s16 - s34)* sinvalue_lookup[2]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] - + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] - + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = (d07 - d16 + d34)* sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] + + s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] + + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); } void highbd_idst16_c(const tran_low_t *input, tran_low_t *output, int bd) { - static const int N = 16; // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2) static const int32_t sinvalue_lookup[] = { 47852167, 94074787, 137093803, 175444254, 207820161, 233119001, 250479254, 259309736 }; - int i, j; - (void) bd; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - sum += (int64_t)input[j] * sinvalue_lookup[idx - 1] * (sign ? -1 : 1); - } - sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - output[i] = WRAPLOW(sum, bd); - } + int64_t sum; + int64_t s015 = (input[0] + input[15]); + int64_t d015 = (input[0] - input[15]); + int64_t s114 = (input[1] + input[14]); + int64_t d114 = (input[1] - input[14]); + int64_t s213 = (input[2] + input[13]); + int64_t d213 = (input[2] - input[13]); + int64_t s312 = (input[3] + input[12]); + int64_t d312 = (input[3] - input[12]); + int64_t s411 = (input[4] + input[11]); + int64_t d411 = (input[4] - input[11]); + int64_t s510 = (input[5] + input[10]); + int64_t d510 = (input[5] - input[10]); + int64_t s69 = (input[6] + input[9]); + int64_t d69 = (input[6] - input[9]); + int64_t s78 = (input[7] + input[8]); + int64_t d78 = (input[7] - input[8]); + sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] + + s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] + + s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] + + s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7]; + output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] + + d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] + + d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] + + d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0]; + output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] + + s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] + + s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] - + s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6]; + output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] + + d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] - + d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] - + d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1]; + output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] + + s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] - + s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] + + s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5]; + output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] - + d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] - + d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] + + d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2]; + output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] - + s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] + + s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] + + s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4]; + output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] - + d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] + + d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] - + d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3]; + output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] - + s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] + + s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] - + s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3]; + output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] - + d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] + + d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] + + d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4]; + output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] - + s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] - + s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] + + s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2]; + output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] + + d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] - + d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] + + d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5]; + output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] + + s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] - + s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] - + s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1]; + output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] + + d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] + + d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] - + d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6]; + output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] + + s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] + + s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] + + s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0]; + output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); + sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] + + d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] + + d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] + + d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7]; + output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd); } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_EXT_TX diff --git a/vp10/encoder/dct.c b/vp10/encoder/dct.c index f1fb19537..ba0b50ab2 100644 --- a/vp10/encoder/dct.c +++ b/vp10/encoder/dct.c @@ -36,79 +36,166 @@ static INLINE void range_check(const tran_low_t *input, const int size, #if CONFIG_EXT_TX void fdst4(const tran_low_t *input, tran_low_t *output) { - static const int N = 4; // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2) static const int32_t sinvalue_lookup[] = { 141124871, 228344838, }; - int i, j; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - idx--; - sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1); - } - output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - } + int64_t sum; + int64_t s03 = (input[0] + input[3]); + int64_t d03 = (input[0] - input[3]); + int64_t s12 = (input[1] + input[2]); + int64_t d12 = (input[1] - input[2]); + sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1]; + output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0]; + output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0]; + output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1]; + output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); } void fdst8(const tran_low_t *input, tran_low_t *output) { - static const int N = 8; // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2 static const int sinvalue_lookup[] = { 86559612, 162678858, 219176632, 249238470 }; - int i, j; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - idx--; - sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1); - } - output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - } + int64_t sum; + int64_t s07 = (input[0] + input[7]); + int64_t d07 = (input[0] - input[7]); + int64_t s16 = (input[1] + input[6]); + int64_t d16 = (input[1] - input[6]); + int64_t s25 = (input[2] + input[5]); + int64_t d25 = (input[2] - input[5]); + int64_t s34 = (input[3] + input[4]); + int64_t d34 = (input[3] - input[4]); + sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] + + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3]; + output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] + + d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0]; + output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = (s07 + s16 - s34)* sinvalue_lookup[2]; + output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] - + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1]; + output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] - + s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1]; + output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = (d07 - d16 + d34)* sinvalue_lookup[2]; + output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] + + s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0]; + output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] + + d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3]; + output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); } void fdst16(const tran_low_t *input, tran_low_t *output) { - static const int N = 16; // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2) static const int sinvalue_lookup[] = { 47852167, 94074787, 137093803, 175444254, 207820161, 233119001, 250479254, 259309736 }; - int i, j; - for (i = 0; i < N; i++) { - int64_t sum = 0; - for (j = 0; j < N; j++) { - int idx = (i + 1) * (j + 1); - int sign = 0; - if (idx > N + 1) { - sign = (idx / (N + 1)) & 1; - idx %= (N + 1); - } - idx = idx > N + 1 - idx ? N + 1 - idx : idx; - if (idx == 0) continue; - idx--; - sum += (int64_t)input[j] * sinvalue_lookup[idx] * (sign ? -1 : 1); - } - output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); - } + int64_t sum; + int64_t s015 = (input[0] + input[15]); + int64_t d015 = (input[0] - input[15]); + int64_t s114 = (input[1] + input[14]); + int64_t d114 = (input[1] - input[14]); + int64_t s213 = (input[2] + input[13]); + int64_t d213 = (input[2] - input[13]); + int64_t s312 = (input[3] + input[12]); + int64_t d312 = (input[3] - input[12]); + int64_t s411 = (input[4] + input[11]); + int64_t d411 = (input[4] - input[11]); + int64_t s510 = (input[5] + input[10]); + int64_t d510 = (input[5] - input[10]); + int64_t s69 = (input[6] + input[9]); + int64_t d69 = (input[6] - input[9]); + int64_t s78 = (input[7] + input[8]); + int64_t d78 = (input[7] - input[8]); + sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] + + s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] + + s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] + + s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7]; + output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] + + d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] + + d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] + + d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0]; + output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] + + s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] + + s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] - + s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6]; + output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] + + d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] - + d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] - + d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1]; + output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] + + s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] - + s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] + + s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5]; + output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] - + d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] - + d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] + + d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2]; + output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] - + s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] + + s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] + + s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4]; + output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] - + d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] + + d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] - + d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3]; + output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] - + s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] + + s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] - + s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3]; + output[8] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] - + d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] + + d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] + + d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4]; + output[9] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] - + s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] - + s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] + + s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2]; + output[10] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] + + d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] - + d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] + + d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5]; + output[11] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] + + s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] - + s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] - + s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1]; + output[12] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] + + d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] + + d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] - + d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6]; + output[13] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] + + s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] + + s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] + + s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0]; + output[14] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); + sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] + + d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] + + d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] + + d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7]; + output[15] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)); } #endif // CONFIG_EXT_TX diff --git a/vp10/encoder/rdopt.c b/vp10/encoder/rdopt.c index 84f796e4c..9c4baa059 100644 --- a/vp10/encoder/rdopt.c +++ b/vp10/encoder/rdopt.c @@ -640,7 +640,6 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, int start_tx_type, end_tx_type; #endif // CONFIG_EXT_TX - const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs); assert(skip_prob > 0); s0 = vp10_cost_bit(skip_prob, 0); @@ -675,8 +674,14 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, int r_tx_size = 0; #if CONFIG_EXT_TX - if (mbmi->ext_txfrm >= GET_EXT_TX_TYPES(n)) - continue; + if (is_inter_block(mbmi)) { + if (mbmi->ext_txfrm >= GET_EXT_TX_TYPES(n)) { + continue; + } else if (mbmi->ext_txfrm >= ALT11 && best_tx_type == NORM) { + // Terminate if the best so far is still NORM + break; + } + } #endif // CONFIG_EXT_TX for (m = 0; m <= n - (n == (int) max_tx_size); ++m) { @@ -725,8 +730,8 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, last_rd = rd; #if CONFIG_EXT_TX - if (rd < (is_inter_block(mbmi) && - (best_tx_type == NORM) ? ext_tx_th : 1) * best_rd) { + if (rd < (is_inter_block(mbmi) && best_tx_type == NORM ? ext_tx_th : 1) * + best_rd) { #else if (rd < best_rd) { #endif // CONFIG_EXT_TX @@ -747,7 +752,7 @@ static void choose_tx_size_from_rd(VP10_COMP *cpi, MACROBLOCK *x, mbmi->tx_size = best_tx; #if CONFIG_EXT_TX - mbmi->ext_txfrm = best_tx_type; + mbmi->ext_txfrm = best_tx_type > -1 ? best_tx_type : NORM; txfm_rd_in_plane(x, &r, &d, &s, &sse, ref_best_rd, 0, bs, best_tx, cpi->sf.use_fast_coef_costing);