From b1b4ba1bddf7290ff3e054e545e2d47dd70688ad Mon Sep 17 00:00:00 2001 From: Christian Duvivier Date: Thu, 26 Sep 2013 16:01:37 -0700 Subject: [PATCH] Properly save neon registers. Replace current code which corrupts the stack by duplicate of vp8 code to save and restore neon registers. Change-Id: Ibb0220b9aa985d10533befa0a455ebce57a2891a --- vp9/common/arm/neon/vp9_idct16x16_neon.c | 15 ++++---- vp9/common/arm/neon/vp9_save_reg_neon.asm | 36 +++++++++++++++++++ .../arm/neon/vp9_short_idct16x16_add_neon.asm | 12 ------- vp9/vp9_common.mk | 1 + 4 files changed, 46 insertions(+), 18 deletions(-) create mode 100644 vp9/common/arm/neon/vp9_save_reg_neon.asm diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c index fddf902d0..fb7b5cdc4 100644 --- a/vp9/common/arm/neon/vp9_idct16x16_neon.c +++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c @@ -29,17 +29,19 @@ extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src, int16_t skip_adding, uint8_t *dest, int dest_stride); -extern void save_neon_registers(); -extern void restore_neon_registers(); +/* For ARM NEON, d8-d15 are callee-saved registers, and need to be saved. */ +extern void vp9_push_neon(int64_t *store); +extern void vp9_pop_neon(int64_t *store); void vp9_short_idct16x16_add_neon(int16_t *input, uint8_t *dest, int dest_stride) { + int64_t store_reg[8]; int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; // save d8-d15 register values. - save_neon_registers(); + vp9_push_neon(store_reg); /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the @@ -102,18 +104,19 @@ void vp9_short_idct16x16_add_neon(int16_t *input, dest_stride); // restore d8-d15 register values. - restore_neon_registers(); + vp9_pop_neon(store_reg); return; } void vp9_short_idct16x16_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride) { + int64_t store_reg[8]; int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; // save d8-d15 register values. - save_neon_registers(); + vp9_push_neon(store_reg); /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the @@ -163,7 +166,7 @@ void vp9_short_idct16x16_10_add_neon(int16_t *input, dest_stride); // restore d8-d15 register values. - restore_neon_registers(); + vp9_pop_neon(store_reg); return; } diff --git a/vp9/common/arm/neon/vp9_save_reg_neon.asm b/vp9/common/arm/neon/vp9_save_reg_neon.asm new file mode 100644 index 000000000..71c3e7077 --- /dev/null +++ b/vp9/common/arm/neon/vp9_save_reg_neon.asm @@ -0,0 +1,36 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + + EXPORT |vp9_push_neon| + EXPORT |vp9_pop_neon| + + ARM + REQUIRE8 + PRESERVE8 + + AREA ||.text||, CODE, READONLY, ALIGN=2 + +|vp9_push_neon| PROC + vst1.i64 {d8, d9, d10, d11}, [r0]! + vst1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + ENDP + +|vp9_pop_neon| PROC + vld1.i64 {d8, d9, d10, d11}, [r0]! + vld1.i64 {d12, d13, d14, d15}, [r0]! + bx lr + + ENDP + + END + diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm index 856022bbf..df2a0526c 100644 --- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm @@ -12,8 +12,6 @@ EXPORT |vp9_short_idct16x16_add_neon_pass2| EXPORT |vp9_short_idct16x16_10_add_neon_pass1| EXPORT |vp9_short_idct16x16_10_add_neon_pass2| - EXPORT |save_neon_registers| - EXPORT |restore_neon_registers| ARM REQUIRE8 PRESERVE8 @@ -1178,14 +1176,4 @@ end_idct10_16x16_pass2 pop {r3-r9} bx lr ENDP ; |vp9_short_idct16x16_10_add_neon_pass2| -;void |save_neon_registers|() -|save_neon_registers| PROC - vpush {d8-d15} - bx lr - ENDP ; |save_registers| -;void |restore_neon_registers|() -|restore_neon_registers| PROC - vpop {d8-d15} - bx lr - ENDP ; |restore_registers| END diff --git a/vp9/vp9_common.mk b/vp9/vp9_common.mk index 67784749c..68403c337 100644 --- a/vp9/vp9_common.mk +++ b/vp9/vp9_common.mk @@ -108,5 +108,6 @@ VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_short_iht8x8_add_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_mb_lpf_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_copy_neon$(ASM) VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_avg_neon$(ASM) +VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_save_reg_neon$(ASM) $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.sh)) -- 2.40.0