}
}
+#if CONFIG_INTHT16X16
+void iadst16_1d(int16_t *input, int16_t *output) {
+ int x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+ int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
+
+ x0 = input[15];
+ x1 = input[0];
+ x2 = input[13];
+ x3 = input[2];
+ x4 = input[11];
+ x5 = input[4];
+ x6 = input[9];
+ x7 = input[6];
+ x8 = input[7];
+ x9 = input[8];
+ x10 = input[5];
+ x11 = input[10];
+ x12 = input[3];
+ x13 = input[12];
+ x14 = input[1];
+ x15 = input[14];
+
+ if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8
+ | x9 | x10 | x11 | x12 | x13 | x14 | x15)) {
+ output[0] = output[1] = output[2] = output[3] = output[4]
+ = output[5] = output[6] = output[7] = output[8]
+ = output[9] = output[10] = output[11] = output[12]
+ = output[13] = output[14] = output[15] = 0;
+ return;
+ }
+
+ // stage 1
+ s0 = x0 * cospi_1_64 + x1 * cospi_31_64;
+ s1 = x0 * cospi_31_64 - x1 * cospi_1_64;
+ s2 = x2 * cospi_5_64 + x3 * cospi_27_64;
+ s3 = x2 * cospi_27_64 - x3 * cospi_5_64;
+ s4 = x4 * cospi_9_64 + x5 * cospi_23_64;
+ s5 = x4 * cospi_23_64 - x5 * cospi_9_64;
+ s6 = x6 * cospi_13_64 + x7 * cospi_19_64;
+ s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
+ s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
+ s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
+ s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
+ s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
+ s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
+ s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
+ s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
+ s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
+
+ x0 = dct_const_round_shift(s0 + s8);
+ x1 = dct_const_round_shift(s1 + s9);
+ x2 = dct_const_round_shift(s2 + s10);
+ x3 = dct_const_round_shift(s3 + s11);
+ x4 = dct_const_round_shift(s4 + s12);
+ x5 = dct_const_round_shift(s5 + s13);
+ x6 = dct_const_round_shift(s6 + s14);
+ x7 = dct_const_round_shift(s7 + s15);
+ x8 = dct_const_round_shift(s0 - s8);
+ x9 = dct_const_round_shift(s1 - s9);
+ x10 = dct_const_round_shift(s2 - s10);
+ x11 = dct_const_round_shift(s3 - s11);
+ x12 = dct_const_round_shift(s4 - s12);
+ x13 = dct_const_round_shift(s5 - s13);
+ x14 = dct_const_round_shift(s6 - s14);
+ x15 = dct_const_round_shift(s7 - s15);
+
+ // stage 2
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4;
+ s5 = x5;
+ s6 = x6;
+ s7 = x7;
+ s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
+ s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
+ s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
+ s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
+ s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
+ s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
+ s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
+ s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
+
+ x0 = s0 + s4;
+ x1 = s1 + s5;
+ x2 = s2 + s6;
+ x3 = s3 + s7;
+ x4 = s0 - s4;
+ x5 = s1 - s5;
+ x6 = s2 - s6;
+ x7 = s3 - s7;
+ x8 = dct_const_round_shift(s8 + s12);
+ x9 = dct_const_round_shift(s9 + s13);
+ x10 = dct_const_round_shift(s10 + s14);
+ x11 = dct_const_round_shift(s11 + s15);
+ x12 = dct_const_round_shift(s8 - s12);
+ x13 = dct_const_round_shift(s9 - s13);
+ x14 = dct_const_round_shift(s10 - s14);
+ x15 = dct_const_round_shift(s11 - s15);
+
+ // stage 3
+ s0 = x0;
+ s1 = x1;
+ s2 = x2;
+ s3 = x3;
+ s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
+ s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
+ s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
+ s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
+ s8 = x8;
+ s9 = x9;
+ s10 = x10;
+ s11 = x11;
+ s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
+ s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
+ s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
+ s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
+
+ x0 = s0 + s2;
+ x1 = s1 + s3;
+ x2 = s0 - s2;
+ x3 = s1 - s3;
+ x4 = dct_const_round_shift(s4 + s6);
+ x5 = dct_const_round_shift(s5 + s7);
+ x6 = dct_const_round_shift(s4 - s6);
+ x7 = dct_const_round_shift(s5 - s7);
+ x8 = s8 + s10;
+ x9 = s9 + s11;
+ x10 = s8 - s10;
+ x11 = s9 - s11;
+ x12 = dct_const_round_shift(s12 + s14);
+ x13 = dct_const_round_shift(s13 + s15);
+ x14 = dct_const_round_shift(s12 - s14);
+ x15 = dct_const_round_shift(s13 - s15);
+
+ // stage 4
+ s2 = (- cospi_16_64) * (x2 + x3);
+ s3 = cospi_16_64 * (x2 - x3);
+ s6 = cospi_16_64 * (x6 + x7);
+ s7 = cospi_16_64 * (- x6 + x7);
+ s10 = cospi_16_64 * (x10 + x11);
+ s11 = cospi_16_64 * (- x10 + x11);
+ s14 = (- cospi_16_64) * (x14 + x15);
+ s15 = cospi_16_64 * (x14 - x15);
+
+ x2 = dct_const_round_shift(s2);
+ x3 = dct_const_round_shift(s3);
+ x6 = dct_const_round_shift(s6);
+ x7 = dct_const_round_shift(s7);
+ x10 = dct_const_round_shift(s10);
+ x11 = dct_const_round_shift(s11);
+ x14 = dct_const_round_shift(s14);
+ x15 = dct_const_round_shift(s15);
+
+ output[0] = x0;
+ output[1] = - x8;
+ output[2] = x12;
+ output[3] = - x4;
+ output[4] = x6;
+ output[5] = x14;
+ output[6] = x10;
+ output[7] = x2;
+ output[8] = x3;
+ output[9] = x11;
+ output[10] = x15;
+ output[11] = x7;
+ output[12] = x5;
+ output[13] = - x13;
+ output[14] = x9;
+ output[15] = - x1;
+}
+
+void vp9_short_iht16x16_c(int16_t *input, int16_t *output,
+ int pitch, TX_TYPE tx_type) {
+ int16_t out[16 * 16];
+ int16_t *outptr = &out[0];
+ const int short_pitch = pitch >> 1;
+ int i, j;
+ int16_t temp_in[16], temp_out[16];
+
+ void (*invr)(int16_t*, int16_t*);
+ void (*invc)(int16_t*, int16_t*);
+
+ switch (tx_type) {
+ case ADST_ADST:
+ invc = &iadst16_1d;
+ invr = &iadst16_1d;
+ break;
+ case ADST_DCT:
+ invc = &iadst16_1d;
+ invr = &idct16_1d;
+ break;
+ case DCT_ADST:
+ invc = &idct16_1d;
+ invr = &iadst16_1d;
+ break;
+ case DCT_DCT:
+ invc = &idct16_1d;
+ invr = &idct16_1d;
+ break;
+ default:
+ assert(0);
+ }
+
+ // inverse transform row vectors
+ for (i = 0; i < 16; ++i) {
+ invr(input, outptr);
+ input += short_pitch;
+ outptr += 16;
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 16; ++i) {
+ for (j = 0; j < 16; ++j)
+ temp_in[j] = out[j * 16 + i];
+ invc(temp_in, temp_out);
+ for (j = 0; j < 16; ++j)
+ output[j * 16 + i] = (temp_out[j] + 32) >> 6;
+ }
+}
+#endif
+
+
+
void vp9_short_idct10_16x16_c(int16_t *input, int16_t *output, int pitch) {
int16_t out[16 * 16];
int16_t *outptr = &out[0];