VPX: removed step checks from neon convolve code

author Scott LaVarnway <slavarnway@google.com>

Wed, 12 Aug 2015 23:14:53 +0000 (16:14 -0700)

committer Johann <johannkoenig@google.com>

Wed, 12 Aug 2015 23:46:53 +0000 (16:46 -0700)
author Scott LaVarnway <slavarnway@google.com>
Wed, 12 Aug 2015 23:14:53 +0000 (16:14 -0700)
committer Johann <johannkoenig@google.com>
Wed, 12 Aug 2015 23:46:53 +0000 (16:46 -0700)
diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/vpx_dsp/arm/vpx_convolve8_avg_neon.c

index 5464250e673f3c2a7d3e1e368d568fa6c3b44e1a..8632250138c18b7f7ce86cac0892a76619857026 100644 (file)
--- a/vpx_dsp/arm/vpx_convolve8_avg_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon.c
@@ -9,23 +9,13 @@
   */
  
  #include <arm_neon.h>
+#include <assert.h>
  
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  #include "vpx/vpx_integer.h"
  #include "vpx_ports/mem.h"
  
-void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h);
-void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
-                               uint8_t *dst, ptrdiff_t dst_stride,
-                               const int16_t *filter_x, int x_step_q4,
-                               const int16_t *filter_y, int y_step_q4,
-                               int w, int h);
-
  static INLINE int32x4_t MULTIPLY_BY_Q0(
      int16x4_t dsrc0,
      int16x4_t dsrc1,
@@ -82,12 +72,7 @@ void vpx_convolve8_avg_horiz_neon(
    uint16x4x2_t d0x2u16, d1x2u16;
    uint32x4x2_t q0x2u32;
  
-  if (x_step_q4 != 16) {
-    vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride,
-                              filter_x, x_step_q4,
-                              filter_y, y_step_q4, w, h);
-    return;
-}
+  assert(x_step_q4 == 16);
  
    q0s16 = vld1q_s16(filter_x);
  
@@ -271,12 +256,7 @@ void vpx_convolve8_avg_vert_neon(
    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
    int32x4_t q1s32, q2s32, q14s32, q15s32;
  
-  if (y_step_q4 != 16) {
-    vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride,
-                             filter_x, x_step_q4,
-                             filter_y, y_step_q4, w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);
  
    src -= src_stride * 3;
    q0s16 = vld1q_s16(filter_y);
diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm

index a19f97db7af2504e9f8ce7c03011b9c8e5d6998e..e279d570fc2acb1922edd61fa3132b6d4d7a83aa 100644 (file)
--- a/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm
@@ -19,8 +19,6 @@
  
      EXPORT  |vpx_convolve8_avg_horiz_neon|
      EXPORT  |vpx_convolve8_avg_vert_neon|
-    IMPORT  |vpx_convolve8_avg_horiz_c|
-    IMPORT  |vpx_convolve8_avg_vert_c|
      ARM
      REQUIRE8
      PRESERVE8
@@ -52,10 +50,6 @@
  ; sp[]int h
  
  |vpx_convolve8_avg_horiz_neon| PROC
-    ldr             r12, [sp, #4]           ; x_step_q4
-    cmp             r12, #16
-    bne             vpx_convolve8_avg_horiz_c
-
      push            {r4-r10, lr}
  
      sub             r0, r0, #3              ; adjust for taps
@@ -184,10 +178,6 @@ vpx_convolve8_avg_loop_horiz
      ENDP
  
  |vpx_convolve8_avg_vert_neon| PROC
-    ldr             r12, [sp, #12]
-    cmp             r12, #16
-    bne             vpx_convolve8_avg_vert_c
-
      push            {r4-r8, lr}
  
      ; adjust for taps
diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c

index 6f634b3c7685ac17d3c457fa069d705c83c98cc9..9bd715e2c630b2f65adeeb18195f6eea9b2685d8 100644 (file)
--- a/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -9,23 +9,13 @@
   */
  
  #include <arm_neon.h>
+#include <assert.h>
  
  #include "./vpx_config.h"
  #include "./vpx_dsp_rtcd.h"
  #include "vpx/vpx_integer.h"
  #include "vpx_ports/mem.h"
  
-void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride,
-                           const int16_t *filter_x, int x_step_q4,
-                           const int16_t *filter_y, int y_step_q4,
-                           int w, int h);
-void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride,
-                           const int16_t *filter_x, int x_step_q4,
-                           const int16_t *filter_y, int y_step_q4,
-                           int w, int h);
-
  static INLINE int32x4_t MULTIPLY_BY_Q0(
      int16x4_t dsrc0,
      int16x4_t dsrc1,
@@ -82,12 +72,7 @@ void vpx_convolve8_horiz_neon(
    uint16x4x2_t d0x2u16, d1x2u16;
    uint32x4x2_t q0x2u32;
  
-  if (x_step_q4 != 16) {
-    vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride,
-                          filter_x, x_step_q4,
-                          filter_y, y_step_q4, w, h);
-    return;
-  }
+  assert(x_step_q4 == 16);
  
    q0s16 = vld1q_s16(filter_x);
  
@@ -255,12 +240,7 @@ void vpx_convolve8_vert_neon(
    uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16;
    int32x4_t q1s32, q2s32, q14s32, q15s32;
  
-  if (y_step_q4 != 16) {
-    vpx_convolve8_vert_c(src, src_stride, dst, dst_stride,
-                         filter_x, x_step_q4,
-                         filter_y, y_step_q4, w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);
  
    src -= src_stride * 3;
    q0s16 = vld1q_s16(filter_y);
diff --git a/vpx_dsp/arm/vpx_convolve8_neon_asm.asm b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm

index bc530de41a673d50662eb127da367b35bc1aeb23..2d0f2ae0657299676e942f1f155eaaaa4be5ca35 100644 (file)
--- a/vpx_dsp/arm/vpx_convolve8_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm
@@ -19,8 +19,6 @@
  
      EXPORT  |vpx_convolve8_horiz_neon|
      EXPORT  |vpx_convolve8_vert_neon|
-    IMPORT  |vpx_convolve8_horiz_c|
-    IMPORT  |vpx_convolve8_vert_c|
      ARM
      REQUIRE8
      PRESERVE8
@@ -52,10 +50,6 @@
  ; sp[]int h
  
  |vpx_convolve8_horiz_neon| PROC
-    ldr             r12, [sp, #4]           ; x_step_q4
-    cmp             r12, #16
-    bne             vpx_convolve8_horiz_c
-
      push            {r4-r10, lr}
  
      sub             r0, r0, #3              ; adjust for taps
@@ -173,10 +167,6 @@ vpx_convolve8_loop_horiz
      ENDP
  
  |vpx_convolve8_vert_neon| PROC
-    ldr             r12, [sp, #12]
-    cmp             r12, #16
-    bne             vpx_convolve8_vert_c
-
      push            {r4-r8, lr}
  
      ; adjust for taps
diff --git a/vpx_dsp/arm/vpx_convolve_neon.c b/vpx_dsp/arm/vpx_convolve_neon.c

index 2c0f7e9e7e1f8f3fa8782860a0d9be666de94600..1506ce6203de21ade9449453b47c94237cfa608b 100644 (file)
--- a/vpx_dsp/arm/vpx_convolve_neon.c
+++ b/vpx_dsp/arm/vpx_convolve_neon.c
@@ -8,6 +8,8 @@
   *  be found in the AUTHORS file in the root of the source tree.
   */
  
+#include <assert.h>
+
  #include "./vpx_dsp_rtcd.h"
  #include "vpx_dsp/vpx_dsp_common.h"
  #include "vpx_ports/mem.h"
@@ -25,14 +27,8 @@ void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
    // Account for the vertical phase needing 3 lines prior and 4 lines post
    int intermediate_height = h + 7;
  
-  if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vpx_convolve8_c(src, src_stride,
-                    dst, dst_stride,
-                    filter_x, x_step_q4,
-                    filter_y, y_step_q4,
-                    w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);
+  assert(x_step_q4 == 16);
  
    /* Filter starting 3 lines back. The neon implementation will ignore the
     * given height and filter a multiple of 4 lines. Since this goes in to
@@ -59,14 +55,8 @@ void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
    DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]);
    int intermediate_height = h + 7;
  
-  if (x_step_q4 != 16 || y_step_q4 != 16) {
-    vpx_convolve8_avg_c(src, src_stride,
-                        dst, dst_stride,
-                        filter_x, x_step_q4,
-                        filter_y, y_step_q4,
-                        w, h);
-    return;
-  }
+  assert(y_step_q4 == 16);
+  assert(x_step_q4 == 16);
  
    /* This implementation has the same issues as above. In addition, we only want
     * to average the values after both passes.
author	Scott LaVarnway <slavarnway@google.com>
	Wed, 12 Aug 2015 23:14:53 +0000 (16:14 -0700)
committer	Johann <johannkoenig@google.com>
	Wed, 12 Aug 2015 23:46:53 +0000 (16:46 -0700)
vpx_dsp/arm/vpx_convolve8_avg_neon.c		patch \| blob \| history
vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm		patch \| blob \| history
vpx_dsp/arm/vpx_convolve8_neon.c		patch \| blob \| history
vpx_dsp/arm/vpx_convolve8_neon_asm.asm		patch \| blob \| history
vpx_dsp/arm/vpx_convolve_neon.c		patch \| blob \| history