io_short[7] = _mm_packs_epi32(io[11], io[15]);
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
- idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
} else {
iadst8_sse2(io_short);
}
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
- idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
} else {
iadst8_sse2(io_short);
}
__m128i temp[4];
if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
- highbd_idct8x8_half1d(io);
- highbd_idct8x8_half1d(&io[8]);
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
} else {
highbd_iadst8_sse4_1(io);
highbd_iadst8_sse4_1(&io[8]);
io[7] = io[11];
if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
- highbd_idct8x8_half1d(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
io[8] = temp[0];
io[9] = temp[1];
io[10] = temp[2];
io[11] = temp[3];
- highbd_idct8x8_half1d(&io[8]);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
} else {
highbd_iadst8_sse4_1(io);
io[8] = temp[0];
switch (tx_type) {
case 0: // DCT_DCT
- idct8_sse2(in);
- idct8_sse2(in);
+ vpx_idct8_sse2(in);
+ vpx_idct8_sse2(in);
break;
case 1: // ADST_DCT
- idct8_sse2(in);
+ vpx_idct8_sse2(in);
iadst8_sse2(in);
break;
case 2: // DCT_ADST
iadst8_sse2(in);
- idct8_sse2(in);
+ vpx_idct8_sse2(in);
break;
case 3: // ADST_ADST
iadst8_sse2(in);
io_short[6] = _mm_packs_epi32(io[10], io[14]);
io_short[7] = _mm_packs_epi32(io[11], io[15]);
- idct8_sse2(io_short);
- idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
round_shift_8x8(io_short, io);
} else {
__m128i temp[4];
#include "vpx_dsp/x86/inv_txfm_ssse3.h"
#include "vpx_dsp/x86/transpose_sse2.h"
-void highbd_idct8x8_half1d(__m128i *const io) {
+void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io) {
__m128i step1[8], step2[8];
transpose_32bit_4x4x2(io, io);
io_short[6] = _mm_packs_epi32(io[10], io[14]);
io_short[7] = _mm_packs_epi32(io[11], io[15]);
- idct8_sse2(io_short);
- idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
+ vpx_idct8_sse2(io_short);
round_shift_8x8(io_short, io);
} else {
__m128i temp[4];
- highbd_idct8x8_half1d(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
- highbd_idct8x8_half1d(&io[8]);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
temp[0] = io[4];
temp[1] = io[5];
io[5] = io[9];
io[6] = io[10];
io[7] = io[11];
- highbd_idct8x8_half1d(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
io[8] = temp[0];
io[9] = temp[1];
io[10] = temp[2];
io[11] = temp[3];
- highbd_idct8x8_half1d(&io[8]);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
highbd_idct8x8_final_round(io);
}
io[3] = _mm_sub_epi32(step[0], step[3]); // step[0] - step[3]
}
-void highbd_idct8x8_half1d(__m128i *const io);
+void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io);
#endif // VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_
// 2-D
for (i = 0; i < 2; i++) {
- idct8_sse2(in);
+ vpx_idct8_sse2(in);
}
write_buffer_8x8(in, dest, stride);
recon_and_store_8_dual(dest, dc_value, stride);
}
-void idct8_sse2(__m128i *const in) {
+void vpx_idct8_sse2(__m128i *const in) {
// 8x8 Transpose is copied from vpx_fdct8x8_sse2()
transpose_16bit_8x8(in, in);
}
void idct4_sse2(__m128i *const in);
-void idct8_sse2(__m128i *const in);
+void vpx_idct8_sse2(__m128i *const in);
void idct16_sse2(__m128i *const in0, __m128i *const in1);
void iadst4_sse2(__m128i *const in);
void iadst8_sse2(__m128i *const in);