From 5e0ca9aa4eab5e2cb4b124774c3ecebbc6f1ae35 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Sat, 15 Mar 2014 18:22:49 +0100 Subject: [PATCH] arm: load_deinterleave_chroma_f{dec,enc}_neon load_deinterleave_chroma_fdec_c: 4055 load_deinterleave_chroma_fdec_neon: 995 load_deinterleave_chroma_fenc_c: 4071 load_deinterleave_chroma_fenc_neon: 992 --- common/arm/mc-a.S | 26 ++++++++++++++++++++++++++ common/arm/mc-c.c | 6 ++++++ 2 files changed, 32 insertions(+) diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S index 179315cf..df9e2fb6 100644 --- a/common/arm/mc-a.S +++ b/common/arm/mc-a.S @@ -1466,6 +1466,32 @@ lowres_xloop_end: pop {r4-r10,pc} .endfunc +function x264_load_deinterleave_chroma_fdec_neon + mov ip, #FDEC_STRIDE/2 +1: + vld2.8 {d0-d1}, [r1,:128], r2 + subs r3, r3, #1 + pld [r1] + vst1.8 {d0}, [r0,:64], ip + vst1.8 {d1}, [r0,:64], ip + bgt 1b + + bx lr +.endfunc + +function x264_load_deinterleave_chroma_fenc_neon + mov ip, #FENC_STRIDE/2 +1: + vld2.8 {d0-d1}, [r1,:128], r2 + subs r3, r3, #1 + pld [r1] + vst1.8 {d0}, [r0,:64], ip + vst1.8 {d1}, [r0,:64], ip + bgt 1b + + bx lr +.endfunc + function x264_plane_copy_deinterleave_neon push {r4-r7, lr} ldrd r6, r7, [sp, #28] diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c index bf5e2eb1..e50d7364 100644 --- a/common/arm/mc-c.c +++ b/common/arm/mc-c.c @@ -51,6 +51,9 @@ void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu, pixel *dstv, intptr_t i_dstv, pixel *src, intptr_t i_src, int w, int h ); +void x264_load_deinterleave_chroma_fdec_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); +void x264_load_deinterleave_chroma_fenc_neon( pixel *dst, pixel *src, intptr_t i_src, int height ); + #define MC_WEIGHT(func)\ void x264_mc_weight_w20##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ void x264_mc_weight_w16##func##_neon( uint8_t *, intptr_t, uint8_t *, intptr_t, const x264_weight_t *, int );\ @@ -231,6 +234,9 @@ void x264_mc_init_arm( int cpu, x264_mc_functions_t *pf ) pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon; + pf->load_deinterleave_chroma_fdec = x264_load_deinterleave_chroma_fdec_neon; + pf->load_deinterleave_chroma_fenc = x264_load_deinterleave_chroma_fenc_neon; + pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_neon; pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_neon; pf->avg[PIXEL_8x16] = x264_pixel_avg_8x16_neon; -- 2.40.0