From: Janne Grunau Date: Sat, 15 Mar 2014 18:55:50 +0000 (+0100) Subject: arm: x264_plane_copy_interleave_neon X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1576e51e52148ad1e1d8b5e76562f9eae8d47e6e;p=libx264 arm: x264_plane_copy_interleave_neon plane_copy_interleave_c: 40285 plane_copy_interleave_neon: 10137 --- diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S index e9a5f863..6274c594 100644 --- a/common/arm/mc-a.S +++ b/common/arm/mc-a.S @@ -1569,3 +1569,30 @@ block4: pop {r4-r8, r10, r11, pc} .endfunc + +function x264_plane_copy_interleave_neon + push {r4-r7, lr} + ldrd r6, r7, [sp, #28] + ldrd r4, r5, [sp, #20] + add lr, r6, #15 + bic lr, lr, #15 + sub r1, r1, lr, lsl #1 + sub r3, r3, lr + sub r5, r5, lr +blocki: + vld1.8 {q0}, [r2]! + vld1.8 {q1}, [r4]! + subs lr, lr, #16 + vst2.8 {d0,d2}, [r0]! + vst2.8 {d1,d3}, [r0]! + bgt blocki + + subs r7, r7, #1 + add r0, r0, r1 + add r2, r2, r3 + add r4, r4, r5 + mov lr, r6 + bgt blocki + + pop {r4-r7, pc} +.endfunc diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c index 48b868e4..e134e967 100644 --- a/common/arm/mc-c.c +++ b/common/arm/mc-c.c @@ -54,6 +54,9 @@ void x264_plane_copy_deinterleave_rgb_neon( pixel *dsta, intptr_t i_dsta, pixel *dstb, intptr_t i_dstb, pixel *dstc, intptr_t i_dstc, pixel *src, intptr_t i_src, int pw, int w, int h ); +void x264_plane_copy_interleave_neon( pixel *dst, intptr_t i_dst, + pixel *srcu, intptr_t i_srcu, + pixel *srcv, intptr_t i_srcv, int w, int h ); void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); @@ -238,6 +241,7 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ) pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon; + pf->plane_copy_interleave = x264_plane_copy_interleave_neon; pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon; pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon;