output[15] = WRAPLOW(-step2[0] + step2[15], 8);
}
+#if CONFIG_EXT_TX
+// For use in lieu of DST
+static void ihalfcenter32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 8; ++i) {
+ output[i] = input[16 + i] * 4;
+ output[24 + i] = input[24 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+ }
+ idct16_c(inputhalf, output + 8);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+static void ihalfright32_c(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)dct_const_round_shift(input[i] * Sqrt2);
+ }
+ idct16_c(inputhalf, output + 16);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_ihalfcenter32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 8; ++i) {
+ output[i] = input[16 + i] * 4;
+ output[24 + i] = input[24 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
+ input[i] * Sqrt2, bd);
+ }
+ vpx_highbd_idct16_c(inputhalf, output + 8, bd);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+static void highbd_ihalfright32_c(const tran_low_t *input, tran_low_t *output,
+ int bd) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[i] = input[16 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)highbd_dct_const_round_shift(
+ input[i] * Sqrt2, bd);
+ }
+ vpx_highbd_idct16_c(inputhalf, output + 16, bd);
+ // Note overall scaling factor is 4 times orthogonal
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
+
// Inverse identiy transform and add.
static void inv_idtx_add_c(const tran_low_t *input, uint8_t *dest, int stride,
int bs) {
}
}
+#if CONFIG_EXT_TX
+void vp10_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ static const transform_2d IHT_32[] = {
+ { idct32_c, idct32_c }, // DCT_DCT = 0,
+ { ihalfright32_c, idct32_c }, // ADST_DCT = 1,
+ { idct32_c, ihalfright32_c }, // DCT_ADST = 2,
+ { ihalfright32_c, ihalfright32_c }, // ADST_ADST = 3,
+ { ihalfright32_c, idct32_c }, // FLIPADST_DCT = 4,
+ { idct32_c, ihalfright32_c }, // DCT_FLIPADST = 5,
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_FLIPADST = 6,
+ { ihalfright32_c, ihalfright32_c }, // ADST_FLIPADST = 7,
+ { ihalfright32_c, ihalfright32_c }, // FLIPADST_ADST = 8,
+ { ihalfcenter32_c, idct32_c }, // DST_DCT = 9,
+ { idct32_c, ihalfcenter32_c }, // DCT_DST = 10,
+ { ihalfcenter32_c, ihalfright32_c }, // DST_ADST = 11,
+ { ihalfright32_c, ihalfcenter32_c }, // ADST_DST = 12,
+ { ihalfcenter32_c, ihalfright32_c }, // DST_FLIPADST = 13,
+ { ihalfright32_c, ihalfcenter32_c }, // FLIPADST_DST = 14,
+ { ihalfcenter32_c, ihalfcenter32_c }, // DST_DST = 15
+ };
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[32][32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 32;
+
+ // inverse transform row vectors
+ for (i = 0; i < 32; ++i) {
+ IHT_32[tx_type].rows(input, out[i]);
+ input += 32;
+ }
+
+ // transpose
+ for (i = 1 ; i < 32; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 32; ++i) {
+ IHT_32[tx_type].cols(out[i], out[i]);
+ }
+
+ maybe_flip_strides(&dest, &stride, &outp, &outstride, tx_type, 32);
+
+ // Sum with the destination
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = clip_pixel_add(dest[d], ROUND_POWER_OF_TWO(outp[s], 6));
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
// idct
void vp10_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob) {
vp10_idct32x32_add(input, dest, stride, eob);
break;
#if CONFIG_EXT_TX
- case IDTX:
- inv_idtx_add_c(input, dest, stride, 32);
- break;
-#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- assert(0);
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ vp10_iht32x32_1024_add_c(input, dest, stride, tx_type);
break;
+ case IDTX:
+ inv_idtx_add_c(input, dest, stride, 32);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
}
}
+#if CONFIG_EXT_TX
+void vp10_highbd_iht32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
+ int stride, int tx_type, int bd) {
+ static const highbd_transform_2d HIGH_IHT_32[] = {
+ { vpx_highbd_idct32_c, vpx_highbd_idct32_c }, // DCT_DCT
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // ADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_ADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_ADST
+ { highbd_ihalfright32_c, vpx_highbd_idct32_c }, // FLIPADST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfright32_c }, // DCT_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // ADST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfright32_c }, // FLIPADST_ADST
+ { highbd_ihalfcenter32_c, vpx_highbd_idct32_c }, // DST_DCT
+ { vpx_highbd_idct32_c, highbd_ihalfcenter32_c }, // DCT_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_ADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // ADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfright32_c }, // DST_FLIPADST
+ { highbd_ihalfright32_c, highbd_ihalfcenter32_c }, // FLIPADST_DST
+ { highbd_ihalfcenter32_c, highbd_ihalfcenter32_c }, // DST_DST
+ };
+
+ uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
+
+ int i, j;
+ tran_low_t tmp;
+ tran_low_t out[32][32];
+ tran_low_t *outp = &out[0][0];
+ int outstride = 32;
+
+ // inverse transform row vectors
+ for (i = 0; i < 32; ++i) {
+ HIGH_IHT_32[tx_type].rows(input, out[i], bd);
+ input += 32;
+ }
+
+ // transpose
+ for (i = 1 ; i < 32; i++) {
+ for (j = 0; j < i; j++) {
+ tmp = out[i][j];
+ out[i][j] = out[j][i];
+ out[j][i] = tmp;
+ }
+ }
+
+ // inverse transform column vectors
+ for (i = 0; i < 32; ++i) {
+ HIGH_IHT_32[tx_type].cols(out[i], out[i], bd);
+ }
+
+ maybe_flip_strides16(&dest, &stride, &outp, &outstride, tx_type, 32);
+
+ // Sum with the destination
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j) {
+ int d = i * stride + j;
+ int s = j * outstride + i;
+ dest[d] = highbd_clip_pixel_add(dest[d],
+ ROUND_POWER_OF_TWO(outp[s], 6), bd);
+ }
+ }
+}
+#endif // CONFIG_EXT_TX
+
// idct
void vp10_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
int eob, int bd) {
vp10_highbd_idct32x32_add(input, dest, stride, eob, bd);
break;
#if CONFIG_EXT_TX
- case IDTX:
- highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
- break;
-#endif // CONFIG_EXT_TX
case ADST_DCT:
case DCT_ADST:
case ADST_ADST:
- assert(0);
+ case FLIPADST_DCT:
+ case DCT_FLIPADST:
+ case FLIPADST_FLIPADST:
+ case ADST_FLIPADST:
+ case FLIPADST_ADST:
+ case DST_DST:
+ case DST_DCT:
+ case DCT_DST:
+ case DST_ADST:
+ case ADST_DST:
+ case FLIPADST_DST:
+ case DST_FLIPADST:
+ vp10_highbd_iht32x32_1024_add_c(input, dest, stride, tx_type, bd);
break;
+ case IDTX:
+ highbd_inv_idtx_add_c(input, dest, stride, 32, bd);
+ break;
+#endif // CONFIG_EXT_TX
default:
assert(0);
break;
add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht16x16 sse2/;
+ add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x32/;
+
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fwht4x4/, "$mmx_x86inc";
} else {
add_proto qw/void vp10_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_fht16x16 sse2 msa/;
+ add_proto qw/void vp10_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_fht32x32/;
+
add_proto qw/void vp10_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_fwht4x4 msa/, "$mmx_x86inc";
}
add_proto qw/void vp10_highbd_fht16x16/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
specialize qw/vp10_highbd_fht16x16/;
+ add_proto qw/void vp10_highbd_fht32x32/, "const int16_t *input, tran_low_t *output, int stride, int tx_type";
+ specialize qw/vp10_highbd_fht32x32/;
+
add_proto qw/void vp10_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vp10_highbd_fwht4x4/;
#include "./vp10_rtcd.h"
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
-
#include "vp10/common/blockd.h"
#include "vp10/common/idct.h"
#include "vpx_dsp/fwd_txfm.h"
range_check(output, 16, 16);
}
-/* TODO(angiebird): Unify this with vp10_fwd_txfm.c: vp10_fdct32
+#if CONFIG_EXT_TX
static void fdct32(const tran_low_t *input, tran_low_t *output) {
tran_high_t temp;
tran_low_t step[32];
range_check(output, 32, 18);
}
-*/
+#endif // CONFIG_EXT_TX
static void fadst4(const tran_low_t *input, tran_low_t *output) {
tran_high_t x0, x1, x2, x3;
}
#if CONFIG_EXT_TX
+// For use in lieu of DST
+static void fhalfcenter32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 8; ++i) {
+ output[16 + i] = input[i] * 4;
+ output[24 + i] = input[24 + i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 8] * Sqrt2);
+ }
+ fdct16(inputhalf, output);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
+// For use in lieu of ADST
+static void fhalfright32(const tran_low_t *input, tran_low_t *output) {
+ int i;
+ tran_low_t inputhalf[16];
+ for (i = 0; i < 16; ++i) {
+ output[16 + i] = input[i] * 4;
+ }
+ // Multiply input by sqrt(2)
+ for (i = 0; i < 16; ++i) {
+ inputhalf[i] = (tran_low_t)fdct_round_shift(input[i + 16] * Sqrt2);
+ }
+ fdct16(inputhalf, output);
+ // Note overall scaling factor is 4 times orthogonal
+}
+
static void copy_block(const int16_t *src, int src_stride, int l,
int16_t *dest, int dest_stride) {
int i;
#endif // CONFIG_EXT_TX
};
+#if CONFIG_EXT_TX
+static const transform_2d FHT_32[] = {
+ { fdct32, fdct32 }, // DCT_DCT = 0,
+ { fhalfright32, fdct32 }, // ADST_DCT = 1,
+ { fdct32, fhalfright32 }, // DCT_ADST = 2,
+ { fhalfright32, fhalfright32 }, // ADST_ADST = 3,
+ { fhalfright32, fdct32 }, // FLIPADST_DCT = 4,
+ { fdct32, fhalfright32 }, // DCT_FLIPADST = 5,
+ { fhalfright32, fhalfright32 }, // FLIPADST_FLIPADST = 6,
+ { fhalfright32, fhalfright32 }, // ADST_FLIPADST = 7,
+ { fhalfright32, fhalfright32 }, // FLIPADST_ADST = 8,
+ { fhalfcenter32, fdct32 }, // DST_DCT = 9,
+ { fdct32, fhalfcenter32 }, // DCT_DST = 10,
+ { fhalfcenter32, fhalfright32 }, // DST_ADST = 11,
+ { fhalfright32, fhalfcenter32 }, // ADST_DST = 12,
+ { fhalfcenter32, fhalfright32 }, // DST_FLIPADST = 13,
+ { fhalfright32, fhalfcenter32 }, // FLIPADST_DST = 14,
+ { fhalfcenter32, fhalfcenter32 }, // DST_DST = 15
+};
+#endif // CONFIG_EXT_TX
+
void vp10_fht4x4_c(const int16_t *input, tran_low_t *output,
int stride, int tx_type) {
if (tx_type == DCT_DCT) {
vp10_fht16x16_c(input, output, stride, tx_type);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_EXT_TX
+void vp10_fht32x32_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ if (tx_type == DCT_DCT) {
+ vpx_fdct32x32_c(input, output, stride);
+ } else {
+ tran_low_t out[1024];
+ int i, j;
+ tran_low_t temp_in[32], temp_out[32];
+ const transform_2d ht = FHT_32[tx_type];
+
+ int16_t flipped_input[32 * 32];
+ maybe_flip_input(&input, &stride, 32, flipped_input, tx_type);
+
+ // Columns
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = input[j * stride + i] * 4;
+ ht.cols(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2;
+ }
+
+ // Rows
+ for (i = 0; i < 32; ++i) {
+ for (j = 0; j < 32; ++j)
+ temp_in[j] = out[j + i * 32];
+ ht.rows(temp_in, temp_out);
+ for (j = 0; j < 32; ++j)
+ output[j + i * 32] =
+ (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2);
+ }
+ }
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void vp10_highbd_fht32x32_c(const int16_t *input, tran_low_t *output,
+ int stride, int tx_type) {
+ vp10_fht32x32_c(input, output, stride, tx_type);
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_EXT_TX
}
}
-static void highbd_idct32_c(const tran_low_t *input,
- tran_low_t *output, int bd) {
+void vpx_highbd_idct32_c(const tran_low_t *input,
+ tran_low_t *output, int bd) {
tran_low_t step1[32], step2[32];
tran_high_t temp1, temp2;
(void) bd;
zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
if (zero_coeff[0] | zero_coeff[1])
- highbd_idct32_c(input, outptr, bd);
+ vpx_highbd_idct32_c(input, outptr, bd);
else
memset(outptr, 0, sizeof(tran_low_t) * 32);
input += 32;
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- highbd_idct32_c(temp_in, temp_out, bd);
+ vpx_highbd_idct32_c(temp_in, temp_out, bd);
for (j = 0; j < 32; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
// Rows
// Only upper-left 8x8 has non-zero coeff.
for (i = 0; i < 8; ++i) {
- highbd_idct32_c(input, outptr, bd);
+ vpx_highbd_idct32_c(input, outptr, bd);
input += 32;
outptr += 32;
}
for (i = 0; i < 32; ++i) {
for (j = 0; j < 32; ++j)
temp_in[j] = out[j * 32 + i];
- highbd_idct32_c(temp_in, temp_out, bd);
+ vpx_highbd_idct32_c(temp_in, temp_out, bd);
for (j = 0; j < 32; ++j) {
dest[j * stride + i] = highbd_clip_pixel_add(
dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd);
+void vpx_highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd);
void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd);
static const tran_high_t cospi_30_64 = 1606;
static const tran_high_t cospi_31_64 = 804;
-// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
+// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
static const tran_high_t sinpi_1_9 = 5283;
static const tran_high_t sinpi_2_9 = 9929;
static const tran_high_t sinpi_3_9 = 13377;
static const tran_high_t sinpi_4_9 = 15212;
+// 16384 * sqrt(2)
+static const tran_high_t Sqrt2 = 23170;
+
#endif // VPX_DSP_TXFM_COMMON_H_