neon,load_unaligned_*: use dup for lane 0

author James Zern <jzern@google.com>

Fri, 2 Sep 2022 01:47:50 +0000 (18:47 -0700)

committer James Zern <jzern@google.com>

Fri, 2 Sep 2022 01:47:50 +0000 (18:47 -0700)
author James Zern <jzern@google.com>
Fri, 2 Sep 2022 01:47:50 +0000 (18:47 -0700)
committer James Zern <jzern@google.com>
Fri, 2 Sep 2022 01:47:50 +0000 (18:47 -0700)
diff --git a/vpx_dsp/arm/mem_neon.h b/vpx_dsp/arm/mem_neon.h

index 50aaa94fe0919d34b50696b9336ff783c1ff6544..84aae161b3cadb18c21138a3cafd6ca4adc2b54e 100644 (file)
--- a/vpx_dsp/arm/mem_neon.h
+++ b/vpx_dsp/arm/mem_neon.h
@@ -116,11 +116,11 @@ static INLINE void uint32_to_mem(uint8_t *buf, uint32_t a) {
  static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf,
                                            ptrdiff_t stride) {
    uint32_t a;
-  uint32x2_t a_u32 = vdup_n_u32(0);
+  uint32x2_t a_u32;
    if (stride == 4) return vld1_u8(buf);
    memcpy(&a, buf, 4);
    buf += stride;
-  a_u32 = vset_lane_u32(a, a_u32, 0);
+  a_u32 = vdup_n_u32(a);
    memcpy(&a, buf, 4);
    a_u32 = vset_lane_u32(a, a_u32, 1);
    return vreinterpret_u8_u32(a_u32);
@@ -143,11 +143,11 @@ static INLINE void store_unaligned_u8(uint8_t *buf, ptrdiff_t stride,
  static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf,
                                              ptrdiff_t stride) {
    uint32_t a;
-  uint32x4_t a_u32 = vdupq_n_u32(0);
+  uint32x4_t a_u32;
    if (stride == 4) return vld1q_u8(buf);
    memcpy(&a, buf, 4);
    buf += stride;
-  a_u32 = vsetq_lane_u32(a, a_u32, 0);
+  a_u32 = vdupq_n_u32(a);
    memcpy(&a, buf, 4);
    buf += stride;
    a_u32 = vsetq_lane_u32(a, a_u32, 1);
diff --git a/vpx_dsp/arm/sad4d_neon.c b/vpx_dsp/arm/sad4d_neon.c

index 03f716c3d58e62db1e3a9c827cc64bc4b9f79ad4..53866296ce8d064575218da6e5f210ca6dd3266b 100644 (file)
--- a/vpx_dsp/arm/sad4d_neon.c
+++ b/vpx_dsp/arm/sad4d_neon.c
@@ -20,9 +20,9 @@
  static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0,
                                                   const void *const buf1) {
    uint32_t a;
-  uint32x2_t aa = vdup_n_u32(0);
+  uint32x2_t aa;
    memcpy(&a, buf0, 4);
-  aa = vset_lane_u32(a, aa, 0);
+  aa = vdup_n_u32(a);
    memcpy(&a, buf1, 4);
    aa = vset_lane_u32(a, aa, 1);
    return vreinterpret_u8_u32(aa);
author	James Zern <jzern@google.com>
	Fri, 2 Sep 2022 01:47:50 +0000 (18:47 -0700)
committer	James Zern <jzern@google.com>
	Fri, 2 Sep 2022 01:47:50 +0000 (18:47 -0700)
vpx_dsp/arm/mem_neon.h		patch \| blob \| history
vpx_dsp/arm/sad4d_neon.c		patch \| blob \| history