extern "C" {
#include "vp9/common/vp9_entropy.h"
#include "./vp9_rtcd.h"
-void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
+void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *output, int pitch);
}
#include "vpx/vpx_integer.h"
INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vp9_short_fdct8x8_c, &vp9_short_idct8x8_add_c, 0)));
+ make_tuple(&vp9_short_fdct8x8_c, &vp9_idct8x8_64_add_c, 0)));
INSTANTIATE_TEST_CASE_P(
C, FwdTrans8x8HT,
::testing::Values(
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8DCT,
::testing::Values(
- make_tuple(&vp9_short_fdct8x8_sse2, &vp9_short_idct8x8_add_sse2, 0)));
+ make_tuple(&vp9_short_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0)));
INSTANTIATE_TEST_CASE_P(
SSE2, FwdTrans8x8HT,
::testing::Values(
reference_dct_2d(input, output_r);
for (int j = 0; j < 64; ++j)
coeff[j] = round(output_r[j]);
- vp9_short_idct8x8_add_c(coeff, dst, 8);
+ vp9_idct8x8_64_add_c(coeff, dst, 8);
for (int j = 0; j < 64; ++j) {
const int diff = dst[j] - src[j];
const int error = diff * diff;
;
- EXPORT |vp9_short_idct8x8_1_add_neon|
+ EXPORT |vp9_idct8x8_1_add_neon|
ARM
REQUIRE8
PRESERVE8
AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp9_short_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
+;void vp9_idct8x8_1_add_neon(int16_t *input, uint8_t *dest,
; int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_short_idct8x8_1_add_neon| PROC
+|vp9_idct8x8_1_add_neon| PROC
ldrsh r0, [r0]
; generate cospi_16_64 = 11585
vst1.64 {d31}, [r12], r2
bx lr
- ENDP ; |vp9_short_idct8x8_1_add_neon|
+ ENDP ; |vp9_idct8x8_1_add_neon|
END
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vp9_short_idct8x8_add_neon|
- EXPORT |vp9_short_idct8x8_10_add_neon|
+ EXPORT |vp9_idct8x8_64_add_neon|
+ EXPORT |vp9_idct8x8_10_add_neon|
ARM
REQUIRE8
PRESERVE8
MEND
AREA Block, CODE, READONLY ; name this block of code
-;void vp9_short_idct8x8_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vp9_idct8x8_64_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_short_idct8x8_add_neon| PROC
+|vp9_idct8x8_64_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
vpop {d8-d15}
pop {r4-r9}
bx lr
- ENDP ; |vp9_short_idct8x8_add_neon|
+ ENDP ; |vp9_idct8x8_64_add_neon|
-;void vp9_short_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
+;void vp9_idct8x8_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride)
;
; r0 int16_t input
; r1 uint8_t *dest
; r2 int dest_stride)
-|vp9_short_idct8x8_10_add_neon| PROC
+|vp9_idct8x8_10_add_neon| PROC
push {r4-r9}
vpush {d8-d15}
vld1.s16 {q8,q9}, [r0]!
vpop {d8-d15}
pop {r4-r9}
bx lr
- ENDP ; |vp9_short_idct8x8_10_add_neon|
+ ENDP ; |vp9_idct8x8_10_add_neon|
END
output[7] = step1[0] - step1[7];
}
-void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct8x8_64_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
int16_t out[8 * 8];
int16_t *outptr = out;
int i, j;
}
}
-void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
+void vp9_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
int i, j;
int a1;
int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
+ dest[j * dest_stride + i]); }
}
-void vp9_short_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
+void vp9_idct8x8_10_add_c(int16_t *input, uint8_t *dest,
int dest_stride) {
int16_t out[8 * 8] = { 0 };
int16_t *outptr = out;
vp9_iwht4x4_1_add(input, dest, stride);
}
-void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob) {
+void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob) {
// If dc is 1, then input[0] is the reconstructed value, do not need
// dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
if (eob) {
if (eob == 1)
// DC only DCT coefficient
- vp9_short_idct8x8_1_add(input, dest, stride);
+ vp9_idct8x8_1_add(input, dest, stride);
else if (eob <= 10)
- vp9_short_idct8x8_10_add(input, dest, stride);
+ vp9_idct8x8_10_add(input, dest, stride);
else
- vp9_short_idct8x8_add(input, dest, stride);
+ vp9_idct8x8_64_add(input, dest, stride);
}
}
void vp9_iht_add_8x8(TX_TYPE tx_type, int16_t *input, uint8_t *dest,
int stride, int eob) {
if (tx_type == DCT_DCT) {
- vp9_idct_add_8x8(input, dest, stride, eob);
+ vp9_idct8x8_add(input, dest, stride, eob);
} else {
if (eob > 0) {
vp9_short_iht8x8_add(input, dest, stride, tx_type);
void vp9_idct4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_iwht4x4_add(int16_t *input, uint8_t *dest, int stride, int eob);
-void vp9_idct_add_8x8(int16_t *input, uint8_t *dest, int stride, int eob);
+void vp9_idct8x8_add(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct_add_16x16(int16_t *input, uint8_t *dest, int stride, int eob);
void vp9_idct_add_32x32(int16_t *input, uint8_t *dest, int stride, int eob);
prototype void vp9_idct4x4_16_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_idct4x4_16_add sse2 neon
-prototype void vp9_short_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct8x8_1_add sse2 neon
+prototype void vp9_idct8x8_1_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_idct8x8_1_add sse2 neon
-prototype void vp9_short_idct8x8_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct8x8_add sse2 neon
+prototype void vp9_idct8x8_64_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_idct8x8_64_add sse2 neon
-prototype void vp9_short_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
-specialize vp9_short_idct8x8_10_add sse2 neon
+prototype void vp9_idct8x8_10_add "int16_t *input, uint8_t *dest, int dest_stride"
+specialize vp9_idct8x8_10_add sse2 neon
prototype void vp9_short_idct16x16_1_add "int16_t *input, uint8_t *dest, int dest_stride"
specialize vp9_short_idct16x16_1_add sse2 neon
dest += stride; \
}
-void vp9_short_idct8x8_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_64_add_sse2(int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
RECON_AND_STORE(dest, in7);
}
-void vp9_short_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_1_add_sse2(int16_t *input, uint8_t *dest, int stride) {
__m128i dc_value;
const __m128i zero = _mm_setzero_si128();
int a;
RECON_AND_STORE(dest, in[7]);
}
-void vp9_short_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
+void vp9_idct8x8_10_add_sse2(int16_t *input, uint8_t *dest, int stride) {
const __m128i zero = _mm_setzero_si128();
const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING);
const __m128i final_rounding = _mm_set1_epi16(1<<4);
vp9_idct_add_16x16(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
break;
case TX_8X8:
- vp9_idct_add_8x8(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
+ vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, pd->eobs[block]);
break;
case TX_4X4:
// this is like vp9_short_idct4x4 but has a special case around eob<=1