From: hkuang Date: Fri, 10 Jan 2014 02:40:19 +0000 (-0800) Subject: Add vp9_tm_predictor_4x4 neon implementation X-Git-Tag: v1.4.0~2653^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f2ef3892564e8a2fd76a9ae1b9d19058572f713b;p=libvpx Add vp9_tm_predictor_4x4 neon implementation Change-Id: I10c423bde7ea5a3bac9f14f35c73b6bc31c8f3e3 --- diff --git a/vp9/common/arm/neon/vp9_reconintra_neon.asm b/vp9/common/arm/neon/vp9_reconintra_neon.asm index f106bc78e..98619bb30 100644 --- a/vp9/common/arm/neon/vp9_reconintra_neon.asm +++ b/vp9/common/arm/neon/vp9_reconintra_neon.asm @@ -16,6 +16,7 @@ EXPORT |vp9_h_predictor_8x8_neon| EXPORT |vp9_h_predictor_16x16_neon| EXPORT |vp9_h_predictor_32x32_neon| + EXPORT |vp9_tm_predictor_4x4_neon| ARM REQUIRE8 PRESERVE8 @@ -283,4 +284,52 @@ loop_h bx lr ENDP ; |vp9_h_predictor_32x32_neon| +;void vp9_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, +; const uint8_t *above, +; const uint8_t *left) +; r0 uint8_t *dst +; r1 ptrdiff_t y_stride +; r2 const uint8_t *above +; r3 const uint8_t *left + +|vp9_tm_predictor_4x4_neon| PROC + ; Load ytop_left = above[-1]; + sub r12, r2, #1 + ldrb r12, [r12] + vdup.u8 d0, r12 + + ; Load above 4 pixels + vld1.32 {d2[0]}, [r2] + + ; Compute above - ytop_left + vsubl.u8 q3, d2, d0 + + ; Load left row by row and compute left + (above - ytop_left) + ; 1st row and 2nd row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + ; 3rd row and 4th row + ldrb r12, [r3], #1 + ldrb r2, [r3], #1 + vdup.u16 q1, r12 + vdup.u16 q2, r2 + vadd.s16 q1, q1, q3 + vadd.s16 q2, q2, q3 + vqshrun.s16 d0, q1, #0 + vqshrun.s16 d1, q2, #0 + vst1.32 {d0[0]}, [r0], r1 + vst1.32 {d1[0]}, [r0], r1 + + bx lr + ENDP ; |vp9_tm_predictor_4x4_neon| + END diff --git a/vp9/common/vp9_rtcd_defs.sh b/vp9/common/vp9_rtcd_defs.sh index cc571072f..a32f98aa5 100644 --- a/vp9/common/vp9_rtcd_defs.sh +++ b/vp9/common/vp9_rtcd_defs.sh @@ -57,7 +57,7 @@ prototype void vp9_v_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint specialize vp9_v_predictor_4x4 $sse_x86inc neon prototype void vp9_tm_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" -specialize vp9_tm_predictor_4x4 $sse_x86inc dspr2 +specialize vp9_tm_predictor_4x4 $sse_x86inc neon dspr2 prototype void vp9_dc_predictor_4x4 "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left" specialize vp9_dc_predictor_4x4 $sse_x86inc dspr2