From 15a36a0a0ddaa154c6ab3c3c32a86a71f07c640e Mon Sep 17 00:00:00 2001 From: Dmitry Kovalev Date: Thu, 26 Sep 2013 14:01:25 -0700 Subject: [PATCH] Renaming vp9_short_idct10_16x16 to vp9_short_idct16x16_10. Making function name consistent with vp9_short_idct16x16 and vp9_short_idct16x16_1. Change-Id: I70e54be9e6b9a1dddab0de470686591e96d05517 --- vp9/common/arm/neon/vp9_idct16x16_neon.c | 10 +++++----- .../arm/neon/vp9_short_idct16x16_add_neon.asm | 16 ++++++++-------- vp9/common/vp9_idct.c | 2 +- vp9/common/vp9_rtcd_defs.sh | 4 ++-- vp9/common/x86/vp9_idct_intrin_sse2.c | 2 +- vp9/decoder/vp9_idct_blk.c | 2 +- vp9/encoder/vp9_encodemb.c | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/vp9/common/arm/neon/vp9_idct16x16_neon.c b/vp9/common/arm/neon/vp9_idct16x16_neon.c index 3e3e400a4..fddf902d0 100644 --- a/vp9/common/arm/neon/vp9_idct16x16_neon.c +++ b/vp9/common/arm/neon/vp9_idct16x16_neon.c @@ -20,10 +20,10 @@ extern void vp9_short_idct16x16_add_neon_pass2(int16_t *src, int16_t skip_adding, uint8_t *dest, int dest_stride); -extern void vp9_short_idct10_16x16_add_neon_pass1(int16_t *input, +extern void vp9_short_idct16x16_10_add_neon_pass1(int16_t *input, int16_t *output, int output_stride); -extern void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src, +extern void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src, int16_t *output, int16_t *pass1Output, int16_t skip_adding, @@ -107,7 +107,7 @@ void vp9_short_idct16x16_add_neon(int16_t *input, return; } -void vp9_short_idct10_16x16_add_neon(int16_t *input, +void vp9_short_idct16x16_10_add_neon(int16_t *input, uint8_t *dest, int dest_stride) { int16_t pass1_output[16*16] = {0}; int16_t row_idct_output[16*16] = {0}; @@ -118,12 +118,12 @@ void vp9_short_idct10_16x16_add_neon(int16_t *input, /* Parallel idct on the upper 8 rows */ // First pass processes even elements 0, 2, 4, 6, 8, 10, 12, 14 and save the // stage 6 result in pass1_output. - vp9_short_idct10_16x16_add_neon_pass1(input, pass1_output, 8); + vp9_short_idct16x16_10_add_neon_pass1(input, pass1_output, 8); // Second pass processes odd elements 1, 3, 5, 7, 9, 11, 13, 15 and combines // with result in pass1(pass1_output) to calculate final result in stage 7 // which will be saved into row_idct_output. - vp9_short_idct10_16x16_add_neon_pass2(input+1, + vp9_short_idct16x16_10_add_neon_pass2(input+1, row_idct_output, pass1_output, 0, diff --git a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm index 7464e800f..856022bbf 100644 --- a/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm +++ b/vp9/common/arm/neon/vp9_short_idct16x16_add_neon.asm @@ -10,8 +10,8 @@ EXPORT |vp9_short_idct16x16_add_neon_pass1| EXPORT |vp9_short_idct16x16_add_neon_pass2| - EXPORT |vp9_short_idct10_16x16_add_neon_pass1| - EXPORT |vp9_short_idct10_16x16_add_neon_pass2| + EXPORT |vp9_short_idct16x16_10_add_neon_pass1| + EXPORT |vp9_short_idct16x16_10_add_neon_pass2| EXPORT |save_neon_registers| EXPORT |restore_neon_registers| ARM @@ -788,7 +788,7 @@ end_idct16x16_pass2 bx lr ENDP ; |vp9_short_idct16x16_add_neon_pass2| -;void |vp9_short_idct10_16x16_add_neon_pass1|(int16_t *input, +;void |vp9_short_idct16x16_10_add_neon_pass1|(int16_t *input, ; int16_t *output, int output_stride) ; ; r0 int16_t input @@ -798,7 +798,7 @@ end_idct16x16_pass2 ; idct16 stage1 - stage6 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_short_idct10_16x16_add_neon_pass1| PROC +|vp9_short_idct16x16_10_add_neon_pass1| PROC ; TODO(hkuang): Find a better way to load the elements. ; load elements of 0, 2, 4, 6, 8, 10, 12, 14 into q8 - q15 @@ -907,9 +907,9 @@ end_idct16x16_pass2 vst1.64 {d31}, [r1], r2 bx lr - ENDP ; |vp9_short_idct10_16x16_add_neon_pass1| + ENDP ; |vp9_short_idct16x16_10_add_neon_pass1| -;void vp9_short_idct10_16x16_add_neon_pass2(int16_t *src, +;void vp9_short_idct16x16_10_add_neon_pass2(int16_t *src, ; int16_t *output, ; int16_t *pass1Output, ; int16_t skip_adding, @@ -926,7 +926,7 @@ end_idct16x16_pass2 ; idct16 stage1 - stage7 on all the elements loaded in q8-q15. The output ; will be stored back into q8-q15 registers. This function will touch q0-q7 ; registers and use them as buffer during calculation. -|vp9_short_idct10_16x16_add_neon_pass2| PROC +|vp9_short_idct16x16_10_add_neon_pass2| PROC push {r3-r9} ; TODO(hkuang): Find a better way to load the elements. @@ -1177,7 +1177,7 @@ end_idct16x16_pass2 end_idct10_16x16_pass2 pop {r3-r9} bx lr - ENDP ; |vp9_short_idct10_16x16_add_neon_pass2| + ENDP ; |vp9_short_idct16x16_10_add_neon_pass2| ;void |save_neon_registers|() |save_neon_registers| PROC vpush {d8-d15} diff --git a/vp9/common/vp9_idct.c b/vp9/common/vp9_idct.c index f06bf047b..9975d3678 100644 --- a/vp9/common/vp9_idct.c +++ b/vp9/common/vp9_idct.c @@ -838,7 +838,7 @@ void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride, + dest[j * dest_stride + i]); } } -void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest, +void vp9_short_idct16x16_10_add_c(int16_t *input, uint8_t *dest, int dest_stride) { int16_t out[16 * 16] = { 0 }; int16_t *outptr = out; diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index 042afbbef..52bcbcf84 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -315,8 +315,8 @@ specialize vp9_short_idct16x16_1_add sse2 neon prototype void vp9_short_idct16x16_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct16x16_add sse2 neon -prototype void vp9_short_idct10_16x16_add "int16_t *input, uint8_t *dest, int dest_stride" -specialize vp9_short_idct10_16x16_add sse2 neon +prototype void vp9_short_idct16x16_10_add "int16_t *input, uint8_t *dest, int dest_stride" +specialize vp9_short_idct16x16_10_add sse2 neon prototype void vp9_short_idct32x32_add "int16_t *input, uint8_t *dest, int dest_stride" specialize vp9_short_idct32x32_add sse2 neon diff --git a/vp9/common/x86/vp9_idct_intrin_sse2.c b/vp9/common/x86/vp9_idct_intrin_sse2.c index 8f740f412..f97a6f5bf 100644 --- a/vp9/common/x86/vp9_idct_intrin_sse2.c +++ b/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -2456,7 +2456,7 @@ void vp9_short_iht16x16_add_sse2(int16_t *input, uint8_t *dest, int stride, write_buffer_8x16(dest, in1, stride); } -void vp9_short_idct10_16x16_add_sse2(int16_t *input, uint8_t *dest, +void vp9_short_idct16x16_10_add_sse2(int16_t *input, uint8_t *dest, int stride) { const __m128i rounding = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i final_rounding = _mm_set1_epi16(1<<5); diff --git a/vp9/decoder/vp9_idct_blk.c b/vp9/decoder/vp9_idct_blk.c index 395e636b8..00d2751bd 100644 --- a/vp9/decoder/vp9_idct_blk.c +++ b/vp9/decoder/vp9_idct_blk.c @@ -126,7 +126,7 @@ void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { vp9_short_idct16x16_1_add(input, dest, stride); input[0] = 0; } else if (eob <= 10) { - vp9_short_idct10_16x16_add(input, dest, stride); + vp9_short_idct16x16_10_add(input, dest, stride); vpx_memset(input, 0, 512); } else { vp9_short_idct16x16_add(input, dest, stride); diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c index 6b9109c94..058bde66f 100644 --- a/vp9/encoder/vp9_encodemb.c +++ b/vp9/encoder/vp9_encodemb.c @@ -64,7 +64,7 @@ static void inverse_transform_b_16x16_add(int eob, if (eob <= 1) vp9_short_idct16x16_1_add(dqcoeff, dest, stride); else if (eob <= 10) - vp9_short_idct10_16x16_add(dqcoeff, dest, stride); + vp9_short_idct16x16_10_add(dqcoeff, dest, stride); else vp9_short_idct16x16_add(dqcoeff, dest, stride); } -- 2.40.0