From 2c24f7178dd7dfe3e637d04b110eb91ae2fbb863 Mon Sep 17 00:00:00 2001 From: Johann Date: Fri, 9 Dec 2016 12:54:55 -0800 Subject: [PATCH] Move load_and_transpose to transpose_neon.h Allows for use outside the idcts without pulling in idct_neon.h Change-Id: I4a94c1af3dac3e1b5bc8296ec9eab0ddcc8cfecf --- vpx_dsp/arm/idct32x32_34_add_neon.c | 1 + vpx_dsp/arm/idct_neon.h | 24 ------------------------ vpx_dsp/arm/transpose_neon.h | 23 +++++++++++++++++++++++ 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/vpx_dsp/arm/idct32x32_34_add_neon.c b/vpx_dsp/arm/idct32x32_34_add_neon.c index 7b3560a13..b56deeea6 100644 --- a/vpx_dsp/arm/idct32x32_34_add_neon.c +++ b/vpx_dsp/arm/idct32x32_34_add_neon.c @@ -13,6 +13,7 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" +#include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" // Only for the first pass of the _34_ variant. Since it only uses values from diff --git a/vpx_dsp/arm/idct_neon.h b/vpx_dsp/arm/idct_neon.h index 5d6403730..d9a676cd1 100644 --- a/vpx_dsp/arm/idct_neon.h +++ b/vpx_dsp/arm/idct_neon.h @@ -120,30 +120,6 @@ static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16( return vcombine_s16(vrshrn_n_s32(temp_low, 14), vrshrn_n_s32(temp_high, 14)); } -static INLINE void load_and_transpose_s16_8x8(const int16_t *a, int a_stride, - int16x8_t *a0, int16x8_t *a1, - int16x8_t *a2, int16x8_t *a3, - int16x8_t *a4, int16x8_t *a5, - int16x8_t *a6, int16x8_t *a7) { - *a0 = vld1q_s16(a); - a += a_stride; - *a1 = vld1q_s16(a); - a += a_stride; - *a2 = vld1q_s16(a); - a += a_stride; - *a3 = vld1q_s16(a); - a += a_stride; - *a4 = vld1q_s16(a); - a += a_stride; - *a5 = vld1q_s16(a); - a += a_stride; - *a6 = vld1q_s16(a); - a += a_stride; - *a7 = vld1q_s16(a); - - transpose_s16_8x8(a0, a1, a2, a3, a4, a5, a6, a7); -} - // Shift the output down by 6 and add it to the destination buffer. static INLINE void add_and_store_u8_s16(const int16x8_t a0, const int16x8_t a1, const int16x8_t a2, const int16x8_t a3, diff --git a/vpx_dsp/arm/transpose_neon.h b/vpx_dsp/arm/transpose_neon.h index d0634fd0a..fb28c6ea5 100644 --- a/vpx_dsp/arm/transpose_neon.h +++ b/vpx_dsp/arm/transpose_neon.h @@ -936,4 +936,27 @@ static INLINE void transpose_u8_16x16( *o15 = e7.val[1]; } +static INLINE void load_and_transpose_s16_8x8(const int16_t *a, int a_stride, + int16x8_t *a0, int16x8_t *a1, + int16x8_t *a2, int16x8_t *a3, + int16x8_t *a4, int16x8_t *a5, + int16x8_t *a6, int16x8_t *a7) { + *a0 = vld1q_s16(a); + a += a_stride; + *a1 = vld1q_s16(a); + a += a_stride; + *a2 = vld1q_s16(a); + a += a_stride; + *a3 = vld1q_s16(a); + a += a_stride; + *a4 = vld1q_s16(a); + a += a_stride; + *a5 = vld1q_s16(a); + a += a_stride; + *a6 = vld1q_s16(a); + a += a_stride; + *a7 = vld1q_s16(a); + + transpose_s16_8x8(a0, a1, a2, a3, a4, a5, a6, a7); +} #endif // VPX_DSP_ARM_TRANSPOSE_NEON_H_ -- 2.40.0