#if CONFIG_EXT_TX
#if CONFIG_DST1
-static INLINE void idst_core(const tran_low_t *input, tran_low_t *output,
- const int N, const int32_t *dst_lookup,
- int bd) {
- int i, j;
- (void) bd;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = MIN(idx, N + 1 - idx);
- if (idx == 0) continue;
- sum += (int64_t)input[j] * dst_lookup[idx - 1] * (sign ? -1 : 1);
- }
- sum = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- output[i] = WRAPLOW(sum, bd);
- }
-}
-
void idst4(const tran_low_t *input, tran_low_t *output) {
- idst_core(input, output, 4, dst_lookup4, 8);
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
}
void idst8(const tran_low_t *input, tran_low_t *output) {
- idst_core(input, output, 8, dst_lookup8, 8);
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int32_t sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
}
void idst16(const tran_low_t *input, tran_low_t *output) {
- idst_core(input, output, 16, dst_lookup16, 8);
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), 8);
}
#if CONFIG_VP9_HIGHBITDEPTH
void highbd_idst4(const tran_low_t *input, tran_low_t *output, int bd) {
- idst_core(input, output, 4, dst_lookup4, bd);
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
}
void highbd_idst8(const tran_low_t *input, tran_low_t *output, int bd) {
- idst_core(input, output, 8, dst_lookup8, bd);
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int32_t sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
}
void highbd_idst16(const tran_low_t *input, tran_low_t *output, int bd) {
- idst_core(input, output, 16, dst_lookup16, bd);
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = WRAPLOW(ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS)), bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // CONFIG_DST1
#if CONFIG_EXT_TX
#if CONFIG_DST1
-static INLINE void vp9_fdst_core(const tran_low_t *input, tran_low_t *output,
- const int N, const int32_t* dst_lookup) {
- int i, j;
- for (i = 0; i < N; i++) {
- int64_t sum = 0;
- for (j = 0; j < N; j++) {
- int idx = (i + 1) * (j + 1);
- int sign = 0;
- if (idx > N + 1) {
- sign = (idx / (N + 1)) & 1;
- idx %= (N + 1);
- }
- idx = MIN(idx, N + 1 - idx);
- if (idx == 0) continue;
- idx--;
- sum += (int64_t)input[j] * dst_lookup[idx] * (sign ? -1 : 1);
- }
- output[i] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
- }
-}
-
void vp9_fdst4(const tran_low_t *input, tran_low_t *output) {
- vp9_fdst_core(input, output, 4, dst_lookup4);
+ // {sin(pi/5), sin(pi*2/5)} * sqrt(2/5) * sqrt(2)
+ static const int32_t sinvalue_lookup[] = {
+ 141124871, 228344838,
+ };
+ int64_t sum;
+ int64_t s03 = (input[0] + input[3]);
+ int64_t d03 = (input[0] - input[3]);
+ int64_t s12 = (input[1] + input[2]);
+ int64_t d12 = (input[1] - input[2]);
+ sum = s03 * sinvalue_lookup[0] + s12 * sinvalue_lookup[1];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[1] + d12 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s03 * sinvalue_lookup[1] - s12 * sinvalue_lookup[0];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d03 * sinvalue_lookup[0] - d12 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
}
void vp9_fdst8(const tran_low_t *input, tran_low_t *output) {
- vp9_fdst_core(input, output, 8, dst_lookup8);
+ // {sin(pi/9), sin(pi*2/9), ..., sin(pi*4/9)} * sqrt(2/9) * 2
+ static const int sinvalue_lookup[] = {
+ 86559612, 162678858, 219176632, 249238470
+ };
+ int64_t sum;
+ int64_t s07 = (input[0] + input[7]);
+ int64_t d07 = (input[0] - input[7]);
+ int64_t s16 = (input[1] + input[6]);
+ int64_t d16 = (input[1] - input[6]);
+ int64_t s25 = (input[2] + input[5]);
+ int64_t d25 = (input[2] - input[5]);
+ int64_t s34 = (input[3] + input[4]);
+ int64_t d34 = (input[3] - input[4]);
+ sum = s07 * sinvalue_lookup[0] + s16 * sinvalue_lookup[1] +
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[3];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[1] + d16 * sinvalue_lookup[3] +
+ d25 * sinvalue_lookup[2] + d34 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (s07 + s16 - s34)* sinvalue_lookup[2];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[3] + d16 * sinvalue_lookup[0] -
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[3] - s16 * sinvalue_lookup[0] -
+ s25 * sinvalue_lookup[2] + s34 * sinvalue_lookup[1];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = (d07 - d16 + d34)* sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s07 * sinvalue_lookup[1] - s16 * sinvalue_lookup[3] +
+ s25 * sinvalue_lookup[2] - s34 * sinvalue_lookup[0];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d07 * sinvalue_lookup[0] - d16 * sinvalue_lookup[1] +
+ d25 * sinvalue_lookup[2] - d34 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
}
void vp9_fdst16(const tran_low_t *input, tran_low_t *output) {
- vp9_fdst_core(input, output, 16, dst_lookup16);
+ // {sin(pi/17), sin(pi*2/17, ..., sin(pi*8/17)} * sqrt(2/17) * 2 * sqrt(2)
+ static const int sinvalue_lookup[] = {
+ 47852167, 94074787, 137093803, 175444254,
+ 207820161, 233119001, 250479254, 259309736
+ };
+ int64_t sum;
+ int64_t s015 = (input[0] + input[15]);
+ int64_t d015 = (input[0] - input[15]);
+ int64_t s114 = (input[1] + input[14]);
+ int64_t d114 = (input[1] - input[14]);
+ int64_t s213 = (input[2] + input[13]);
+ int64_t d213 = (input[2] - input[13]);
+ int64_t s312 = (input[3] + input[12]);
+ int64_t d312 = (input[3] - input[12]);
+ int64_t s411 = (input[4] + input[11]);
+ int64_t d411 = (input[4] - input[11]);
+ int64_t s510 = (input[5] + input[10]);
+ int64_t d510 = (input[5] - input[10]);
+ int64_t s69 = (input[6] + input[9]);
+ int64_t d69 = (input[6] - input[9]);
+ int64_t s78 = (input[7] + input[8]);
+ int64_t d78 = (input[7] - input[8]);
+ sum = s015 * sinvalue_lookup[0] + s114 * sinvalue_lookup[1] +
+ s213 * sinvalue_lookup[2] + s312 * sinvalue_lookup[3] +
+ s411 * sinvalue_lookup[4] + s510 * sinvalue_lookup[5] +
+ s69 * sinvalue_lookup[6] + s78 * sinvalue_lookup[7];
+ output[0] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[1] + d114 * sinvalue_lookup[3] +
+ d213 * sinvalue_lookup[5] + d312 * sinvalue_lookup[7] +
+ d411 * sinvalue_lookup[6] + d510 * sinvalue_lookup[4] +
+ d69 * sinvalue_lookup[2] + d78 * sinvalue_lookup[0];
+ output[1] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[2] + s114 * sinvalue_lookup[5] +
+ s213 * sinvalue_lookup[7] + s312 * sinvalue_lookup[4] +
+ s411 * sinvalue_lookup[1] - s510 * sinvalue_lookup[0] -
+ s69 * sinvalue_lookup[3] - s78 * sinvalue_lookup[6];
+ output[2] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[3] + d114 * sinvalue_lookup[7] +
+ d213 * sinvalue_lookup[4] + d312 * sinvalue_lookup[0] -
+ d411 * sinvalue_lookup[2] - d510 * sinvalue_lookup[6] -
+ d69 * sinvalue_lookup[5] - d78 * sinvalue_lookup[1];
+ output[3] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[4] + s114 * sinvalue_lookup[6] +
+ s213 * sinvalue_lookup[1] - s312 * sinvalue_lookup[2] -
+ s411 * sinvalue_lookup[7] - s510 * sinvalue_lookup[3] +
+ s69 * sinvalue_lookup[0] + s78 * sinvalue_lookup[5];
+ output[4] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[5] + d114 * sinvalue_lookup[4] -
+ d213 * sinvalue_lookup[0] - d312 * sinvalue_lookup[6] -
+ d411 * sinvalue_lookup[3] + d510 * sinvalue_lookup[1] +
+ d69 * sinvalue_lookup[7] + d78 * sinvalue_lookup[2];
+ output[5] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[6] + s114 * sinvalue_lookup[2] -
+ s213 * sinvalue_lookup[3] - s312 * sinvalue_lookup[5] +
+ s411 * sinvalue_lookup[0] + s510 * sinvalue_lookup[7] +
+ s69 * sinvalue_lookup[1] - s78 * sinvalue_lookup[4];
+ output[6] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[7] + d114 * sinvalue_lookup[0] -
+ d213 * sinvalue_lookup[6] - d312 * sinvalue_lookup[1] +
+ d411 * sinvalue_lookup[5] + d510 * sinvalue_lookup[2] -
+ d69 * sinvalue_lookup[4] - d78 * sinvalue_lookup[3];
+ output[7] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[7] - s114 * sinvalue_lookup[0] -
+ s213 * sinvalue_lookup[6] + s312 * sinvalue_lookup[1] +
+ s411 * sinvalue_lookup[5] - s510 * sinvalue_lookup[2] -
+ s69 * sinvalue_lookup[4] + s78 * sinvalue_lookup[3];
+ output[8] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[6] - d114 * sinvalue_lookup[2] -
+ d213 * sinvalue_lookup[3] + d312 * sinvalue_lookup[5] +
+ d411 * sinvalue_lookup[0] - d510 * sinvalue_lookup[7] +
+ d69 * sinvalue_lookup[1] + d78 * sinvalue_lookup[4];
+ output[9] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[5] - s114 * sinvalue_lookup[4] -
+ s213 * sinvalue_lookup[0] + s312 * sinvalue_lookup[6] -
+ s411 * sinvalue_lookup[3] - s510 * sinvalue_lookup[1] +
+ s69 * sinvalue_lookup[7] - s78 * sinvalue_lookup[2];
+ output[10] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[4] - d114 * sinvalue_lookup[6] +
+ d213 * sinvalue_lookup[1] + d312 * sinvalue_lookup[2] -
+ d411 * sinvalue_lookup[7] + d510 * sinvalue_lookup[3] +
+ d69 * sinvalue_lookup[0] - d78 * sinvalue_lookup[5];
+ output[11] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[3] - s114 * sinvalue_lookup[7] +
+ s213 * sinvalue_lookup[4] - s312 * sinvalue_lookup[0] -
+ s411 * sinvalue_lookup[2] + s510 * sinvalue_lookup[6] -
+ s69 * sinvalue_lookup[5] + s78 * sinvalue_lookup[1];
+ output[12] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[2] - d114 * sinvalue_lookup[5] +
+ d213 * sinvalue_lookup[7] - d312 * sinvalue_lookup[4] +
+ d411 * sinvalue_lookup[1] + d510 * sinvalue_lookup[0] -
+ d69 * sinvalue_lookup[3] + d78 * sinvalue_lookup[6];
+ output[13] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = s015 * sinvalue_lookup[1] - s114 * sinvalue_lookup[3] +
+ s213 * sinvalue_lookup[5] - s312 * sinvalue_lookup[7] +
+ s411 * sinvalue_lookup[6] - s510 * sinvalue_lookup[4] +
+ s69 * sinvalue_lookup[2] - s78 * sinvalue_lookup[0];
+ output[14] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
+ sum = d015 * sinvalue_lookup[0] - d114 * sinvalue_lookup[1] +
+ d213 * sinvalue_lookup[2] - d312 * sinvalue_lookup[3] +
+ d411 * sinvalue_lookup[4] - d510 * sinvalue_lookup[5] +
+ d69 * sinvalue_lookup[6] - d78 * sinvalue_lookup[7];
+ output[15] = ROUND_POWER_OF_TWO(sum, (2 * DCT_CONST_BITS));
}
#endif // CONFIG_DST1
#endif // CONFIG_EXT_TX