From 0016dec27080e53c794d7f919bd6df6b890d0128 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sat, 15 Mar 2014 19:21:12 +0100 Subject: [PATCH] arm: x264_plane_copy_deinterleave_rgb_neon plane_copy_deinterleave_rgb_c: 31543 plane_copy_deinterleave_rgb_neon: 8312 --- common/arm/mc-a.S | 52 +++++++++++++++++++++++++++++++++++++++++++++++ common/arm/mc-c.c | 5 +++++ 2 files changed, 57 insertions(+) diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S index df9e2fb6..e9a5f863 100644 --- a/common/arm/mc-a.S +++ b/common/arm/mc-a.S @@ -1517,3 +1517,55 @@ block: pop {r4-r7, pc} .endfunc + +function x264_plane_copy_deinterleave_rgb_neon + push {r4-r8, r10, r11, lr} + ldrd r4, r5, [sp, #32] + ldrd r6, r7, [sp, #40] + ldr r8, [sp, #48] + ldrd r10, r11, [sp, #52] + add lr, r10, #7 + subs r8, r8, #3 + bic lr, lr, #7 + sub r7, r7, lr, lsl #1 + sub r1, r1, lr + sub r3, r3, lr + sub r5, r5, lr + subne r7, r7, lr, lsl #1 + subeq r7, r7, lr + bne block4 +block3: + vld3.8 {d0,d1,d2}, [r6]! + subs lr, lr, #8 + vst1.8 {d0}, [r0]! + vst1.8 {d1}, [r2]! + vst1.8 {d2}, [r4]! + bgt block3 + + subs r11, r11, #1 + add r0, r0, r1 + add r2, r2, r3 + add r4, r4, r5 + add r6, r6, r7 + mov lr, r10 + bgt block3 + + pop {r4-r8, r10, r11, pc} +block4: + vld4.8 {d0,d1,d2,d3}, [r6]! + subs lr, lr, #8 + vst1.8 {d0}, [r0]! + vst1.8 {d1}, [r2]! + vst1.8 {d2}, [r4]! + bgt block4 + + subs r11, r11, #1 + add r0, r0, r1 + add r2, r2, r3 + add r4, r4, r5 + add r6, r6, r7 + mov lr, r10 + bgt block4 + + pop {r4-r8, r10, r11, pc} +.endfunc diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c index e50d7364..48b868e4 100644 --- a/common/arm/mc-c.c +++ b/common/arm/mc-c.c @@ -50,6 +50,10 @@ void x264_pixel_avg2_w20_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, uint8_t void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv, pixel *src, intptr_t i_src, int w, int h ); +void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta, + pixel *dstb, intptr_t i_dstb, + pixel *dstc, intptr_t i_dstc, + pixel *src, intptr_t i_src, int pw, int w, int h ); void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); @@ -233,6 +237,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ) pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon; pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; + pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon; pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon; -- 2.40.0