Move load_and_transpose to transpose_neon.h

author Johann <johannkoenig@google.com>

Fri, 9 Dec 2016 20:54:55 +0000 (12:54 -0800)

committer Johann <johannkoenig@google.com>

Fri, 9 Dec 2016 20:54:55 +0000 (12:54 -0800)
author Johann <johannkoenig@google.com>
Fri, 9 Dec 2016 20:54:55 +0000 (12:54 -0800)
committer Johann <johannkoenig@google.com>
Fri, 9 Dec 2016 20:54:55 +0000 (12:54 -0800)
diff --git a/vpx_dsp/arm/idct32x32_34_add_neon.c b/vpx_dsp/arm/idct32x32_34_add_neon.c

index 7b3560a130fae5c56cd0c3bc8b7b3c070c216d96..b56deeea6de5d382b301bbdda60d2b253a46ed8c 100644 (file)
--- a/vpx_dsp/arm/idct32x32_34_add_neon.c
+++ b/vpx_dsp/arm/idct32x32_34_add_neon.c
@@ -13,6 +13,7 @@
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  #include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
  #include "vpx_dsp/txfm_common.h"
  
  // Only for the first pass of the  _34_ variant. Since it only uses values from
diff --git a/vpx_dsp/arm/idct_neon.h b/vpx_dsp/arm/idct_neon.h

index 5d64037306d6d203b1a1f09e00077f92cc8a38fb..d9a676cd1e6e819276c425e582ec25ab1ebacc73 100644 (file)
--- a/vpx_dsp/arm/idct_neon.h
+++ b/vpx_dsp/arm/idct_neon.h
@@ -120,30 +120,6 @@ static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16(
    return vcombine_s16(vrshrn_n_s32(temp_low, 14), vrshrn_n_s32(temp_high, 14));
  }
  
-static INLINE void load_and_transpose_s16_8x8(const int16_t *a, int a_stride,
-                                              int16x8_t *a0, int16x8_t *a1,
-                                              int16x8_t *a2, int16x8_t *a3,
-                                              int16x8_t *a4, int16x8_t *a5,
-                                              int16x8_t *a6, int16x8_t *a7) {
-  *a0 = vld1q_s16(a);
-  a += a_stride;
-  *a1 = vld1q_s16(a);
-  a += a_stride;
-  *a2 = vld1q_s16(a);
-  a += a_stride;
-  *a3 = vld1q_s16(a);
-  a += a_stride;
-  *a4 = vld1q_s16(a);
-  a += a_stride;
-  *a5 = vld1q_s16(a);
-  a += a_stride;
-  *a6 = vld1q_s16(a);
-  a += a_stride;
-  *a7 = vld1q_s16(a);
-
-  transpose_s16_8x8(a0, a1, a2, a3, a4, a5, a6, a7);
-}
-
  // Shift the output down by 6 and add it to the destination buffer.
  static INLINE void add_and_store_u8_s16(const int16x8_t a0, const int16x8_t a1,
                                          const int16x8_t a2, const int16x8_t a3,
diff --git a/vpx_dsp/arm/transpose_neon.h b/vpx_dsp/arm/transpose_neon.h

index d0634fd0aa3edf562f427db6a0f570557db1751f..fb28c6ea5a1523e0720bebc242fd57fbddabe5b2 100644 (file)
--- a/vpx_dsp/arm/transpose_neon.h
+++ b/vpx_dsp/arm/transpose_neon.h
@@ -936,4 +936,27 @@ static INLINE void transpose_u8_16x16(
    *o15 = e7.val[1];
  }
  
+static INLINE void load_and_transpose_s16_8x8(const int16_t *a, int a_stride,
+                                              int16x8_t *a0, int16x8_t *a1,
+                                              int16x8_t *a2, int16x8_t *a3,
+                                              int16x8_t *a4, int16x8_t *a5,
+                                              int16x8_t *a6, int16x8_t *a7) {
+  *a0 = vld1q_s16(a);
+  a += a_stride;
+  *a1 = vld1q_s16(a);
+  a += a_stride;
+  *a2 = vld1q_s16(a);
+  a += a_stride;
+  *a3 = vld1q_s16(a);
+  a += a_stride;
+  *a4 = vld1q_s16(a);
+  a += a_stride;
+  *a5 = vld1q_s16(a);
+  a += a_stride;
+  *a6 = vld1q_s16(a);
+  a += a_stride;
+  *a7 = vld1q_s16(a);
+
+  transpose_s16_8x8(a0, a1, a2, a3, a4, a5, a6, a7);
+}
  #endif  // VPX_DSP_ARM_TRANSPOSE_NEON_H_
author	Johann <johannkoenig@google.com>
	Fri, 9 Dec 2016 20:54:55 +0000 (12:54 -0800)
committer	Johann <johannkoenig@google.com>
	Fri, 9 Dec 2016 20:54:55 +0000 (12:54 -0800)
vpx_dsp/arm/idct32x32_34_add_neon.c		patch \| blob \| history
vpx_dsp/arm/idct_neon.h		patch \| blob \| history
vpx_dsp/arm/transpose_neon.h		patch \| blob \| history