From fa472129334e197f0a977fd8ccd0bf2a145d0935 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Wed, 12 Aug 2015 16:14:53 -0700 Subject: [PATCH] VPX: removed step checks from neon convolve code The check is handled by the predictor table. Change-Id: I42479f843e77a2d40cdcdfc9e2e6c48a05a36561 --- vpx_dsp/arm/vpx_convolve8_avg_neon.c | 26 +++------------------- vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm | 10 --------- vpx_dsp/arm/vpx_convolve8_neon.c | 26 +++------------------- vpx_dsp/arm/vpx_convolve8_neon_asm.asm | 10 --------- vpx_dsp/arm/vpx_convolve_neon.c | 22 +++++------------- 5 files changed, 12 insertions(+), 82 deletions(-) diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon.c b/vpx_dsp/arm/vpx_convolve8_avg_neon.c index 5464250e6..863225013 100644 --- a/vpx_dsp/arm/vpx_convolve8_avg_neon.c +++ b/vpx_dsp/arm/vpx_convolve8_avg_neon.c @@ -9,23 +9,13 @@ */ #include +#include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" -void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); -void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - static INLINE int32x4_t MULTIPLY_BY_Q0( int16x4_t dsrc0, int16x4_t dsrc1, @@ -82,12 +72,7 @@ void vpx_convolve8_avg_horiz_neon( uint16x4x2_t d0x2u16, d1x2u16; uint32x4x2_t q0x2u32; - if (x_step_q4 != 16) { - vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, w, h); - return; -} + assert(x_step_q4 == 16); q0s16 = vld1q_s16(filter_x); @@ -271,12 +256,7 @@ void vpx_convolve8_avg_vert_neon( uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; int32x4_t q1s32, q2s32, q14s32, q15s32; - if (y_step_q4 != 16) { - vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, w, h); - return; - } + assert(y_step_q4 == 16); src -= src_stride * 3; q0s16 = vld1q_s16(filter_y); diff --git a/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm index a19f97db7..e279d570f 100644 --- a/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm +++ b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm @@ -19,8 +19,6 @@ EXPORT |vpx_convolve8_avg_horiz_neon| EXPORT |vpx_convolve8_avg_vert_neon| - IMPORT |vpx_convolve8_avg_horiz_c| - IMPORT |vpx_convolve8_avg_vert_c| ARM REQUIRE8 PRESERVE8 @@ -52,10 +50,6 @@ ; sp[]int h |vpx_convolve8_avg_horiz_neon| PROC - ldr r12, [sp, #4] ; x_step_q4 - cmp r12, #16 - bne vpx_convolve8_avg_horiz_c - push {r4-r10, lr} sub r0, r0, #3 ; adjust for taps @@ -184,10 +178,6 @@ vpx_convolve8_avg_loop_horiz ENDP |vpx_convolve8_avg_vert_neon| PROC - ldr r12, [sp, #12] - cmp r12, #16 - bne vpx_convolve8_avg_vert_c - push {r4-r8, lr} ; adjust for taps diff --git a/vpx_dsp/arm/vpx_convolve8_neon.c b/vpx_dsp/arm/vpx_convolve8_neon.c index 6f634b3c7..9bd715e2c 100644 --- a/vpx_dsp/arm/vpx_convolve8_neon.c +++ b/vpx_dsp/arm/vpx_convolve8_neon.c @@ -9,23 +9,13 @@ */ #include +#include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" -void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); -void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, - uint8_t *dst, ptrdiff_t dst_stride, - const int16_t *filter_x, int x_step_q4, - const int16_t *filter_y, int y_step_q4, - int w, int h); - static INLINE int32x4_t MULTIPLY_BY_Q0( int16x4_t dsrc0, int16x4_t dsrc1, @@ -82,12 +72,7 @@ void vpx_convolve8_horiz_neon( uint16x4x2_t d0x2u16, d1x2u16; uint32x4x2_t q0x2u32; - if (x_step_q4 != 16) { - vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, w, h); - return; - } + assert(x_step_q4 == 16); q0s16 = vld1q_s16(filter_x); @@ -255,12 +240,7 @@ void vpx_convolve8_vert_neon( uint16x8_t q1u16, q2u16, q8u16, q9u16, q10u16, q11u16, q12u16, q13u16; int32x4_t q1s32, q2s32, q14s32, q15s32; - if (y_step_q4 != 16) { - vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, w, h); - return; - } + assert(y_step_q4 == 16); src -= src_stride * 3; q0s16 = vld1q_s16(filter_y); diff --git a/vpx_dsp/arm/vpx_convolve8_neon_asm.asm b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm index bc530de41..2d0f2ae06 100644 --- a/vpx_dsp/arm/vpx_convolve8_neon_asm.asm +++ b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm @@ -19,8 +19,6 @@ EXPORT |vpx_convolve8_horiz_neon| EXPORT |vpx_convolve8_vert_neon| - IMPORT |vpx_convolve8_horiz_c| - IMPORT |vpx_convolve8_vert_c| ARM REQUIRE8 PRESERVE8 @@ -52,10 +50,6 @@ ; sp[]int h |vpx_convolve8_horiz_neon| PROC - ldr r12, [sp, #4] ; x_step_q4 - cmp r12, #16 - bne vpx_convolve8_horiz_c - push {r4-r10, lr} sub r0, r0, #3 ; adjust for taps @@ -173,10 +167,6 @@ vpx_convolve8_loop_horiz ENDP |vpx_convolve8_vert_neon| PROC - ldr r12, [sp, #12] - cmp r12, #16 - bne vpx_convolve8_vert_c - push {r4-r8, lr} ; adjust for taps diff --git a/vpx_dsp/arm/vpx_convolve_neon.c b/vpx_dsp/arm/vpx_convolve_neon.c index 2c0f7e9e7..1506ce620 100644 --- a/vpx_dsp/arm/vpx_convolve_neon.c +++ b/vpx_dsp/arm/vpx_convolve_neon.c @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include + #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" @@ -25,14 +27,8 @@ void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, // Account for the vertical phase needing 3 lines prior and 4 lines post int intermediate_height = h + 7; - if (x_step_q4 != 16 || y_step_q4 != 16) { - vpx_convolve8_c(src, src_stride, - dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - return; - } + assert(y_step_q4 == 16); + assert(x_step_q4 == 16); /* Filter starting 3 lines back. The neon implementation will ignore the * given height and filter a multiple of 4 lines. Since this goes in to @@ -59,14 +55,8 @@ void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, DECLARE_ALIGNED(8, uint8_t, temp[64 * 72]); int intermediate_height = h + 7; - if (x_step_q4 != 16 || y_step_q4 != 16) { - vpx_convolve8_avg_c(src, src_stride, - dst, dst_stride, - filter_x, x_step_q4, - filter_y, y_step_q4, - w, h); - return; - } + assert(y_step_q4 == 16); + assert(x_step_q4 == 16); /* This implementation has the same issues as above. In addition, we only want * to average the values after both passes. -- 2.40.0