prototype void vp9_short_fdct4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct4x4 sse2
-prototype void vp9_short_fdct8x4 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_fdct8x4 sse2
-
prototype void vp9_short_fdct32x32 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_fdct32x32 sse2
prototype void vp9_short_walsh4x4 "int16_t *InputData, int16_t *OutputData, int pitch"
specialize vp9_short_walsh4x4
-prototype void vp9_short_walsh8x4 "int16_t *InputData, int16_t *OutputData, int pitch"
-specialize vp9_short_walsh8x4
-
#
# Motion search
#
BLOCK_SIZE sb64_partitioning;
void (*fwd_txm4x4)(int16_t *input, int16_t *output, int pitch);
- void (*fwd_txm8x4)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm8x8)(int16_t *input, int16_t *output, int pitch);
void (*fwd_txm16x16)(int16_t *input, int16_t *output, int pitch);
void (*quantize_b_4x4)(MACROBLOCK *x, int b_idx, TX_TYPE tx_type,
}
}
-void vp9_short_fdct8x4_c(int16_t *input, int16_t *output, int pitch) {
- vp9_short_fdct4x4_c(input, output, pitch);
- vp9_short_fdct4x4_c(input + 4, output + 16, pitch);
-}
-
static void fdct8(const int16_t *input, int16_t *output) {
/*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
/*needs32*/ int t0, t1, t2, t3;
}
}
-void vp9_short_walsh8x4_c(int16_t *input, int16_t *output, int pitch) {
- vp9_short_walsh4x4_c(input, output, pitch);
- vp9_short_walsh4x4_c(input + 4, output + 16, pitch);
-}
-
-
// Rewrote to use same algorithm as others.
static void fdct16(const int16_t in[16], int16_t out[16]) {
/*canbe16*/ int step1[8];
static void switch_lossless_mode(VP9_COMP *cpi, int lossless) {
if (lossless) {
// printf("Switching to lossless\n");
- cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
cpi->mb.e_mbd.itxm_add = vp9_iwht4x4_add;
cpi->mb.optimize = 0;
cpi->common.tx_mode = ONLY_4X4;
} else {
// printf("Not lossless\n");
- cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
cpi->mb.e_mbd.itxm_add = vp9_idct4x4_add;
}
cpi->mb.fwd_txm16x16 = vp9_short_fdct16x16;
cpi->mb.fwd_txm8x8 = vp9_short_fdct8x8;
- cpi->mb.fwd_txm8x4 = vp9_short_fdct8x4;
cpi->mb.fwd_txm4x4 = vp9_short_fdct4x4;
if (cpi->oxcf.lossless || cpi->mb.e_mbd.lossless) {
- cpi->mb.fwd_txm8x4 = vp9_short_walsh8x4;
cpi->mb.fwd_txm4x4 = vp9_short_walsh4x4;
}
}
}
-void vp9_short_fdct8x4_sse2(int16_t *input, int16_t *output, int pitch) {
- vp9_short_fdct4x4_sse2(input, output, pitch);
- vp9_short_fdct4x4_sse2(input + 4, output + 16, pitch);
-}
-
static INLINE void load_buffer_4x4(int16_t *input, __m128i *in, int stride) {
const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1);
const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);