ret
endfunc
+function x264_plane_copy_swap_core_neon, export=1
+ lsl w4, w4, #1
+ sub x1, x1, x4
+ sub x3, x3, x4
+1:
+ mov w8, w4
+ tbz w4, #4, 32f
+ subs w8, w8, #16
+ ld1 {v0.16b}, [x2], #16
+ rev16 v0.16b, v0.16b
+ st1 {v0.16b}, [x0], #16
+ b.eq 0f
+32:
+ subs w8, w8, #32
+ ld1 {v0.16b,v1.16b}, [x2], #32
+ rev16 v0.16b, v0.16b
+ rev16 v1.16b, v1.16b
+ st1 {v0.16b,v1.16b}, [x0], #32
+ b.gt 32b
+0:
+ subs w5, w5, #1
+ add x2, x2, x3
+ add x0, x0, x1
+ b.gt 1b
+
+ ret
+endfunc
+
function x264_plane_copy_deinterleave_neon, export=1
add w9, w6, #15
and w9, w9, #0xfffffff0
void x264_plane_copy_core_neon( pixel *dst, intptr_t i_dst,
pixel *src, intptr_t i_src, int w, int h );
+void x264_plane_copy_swap_core_neon( pixel *dst, intptr_t i_dst,
+ pixel *src, intptr_t i_src, int w, int h );
void x264_plane_copy_deinterleave_neon( pixel *dstu, intptr_t i_dstu,
pixel *dstv, intptr_t i_dstv,
pixel *src, intptr_t i_src, int w, int h );
int height, int16_t *buf );
PLANE_COPY(16, neon)
+PLANE_COPY_SWAP(16, neon)
PLANE_INTERLEAVE(neon)
#endif // !HIGH_BIT_DEPTH
pf->copy[PIXEL_4x4] = x264_mc_copy_w4_neon;
pf->plane_copy = x264_plane_copy_neon;
+ pf->plane_copy_swap = x264_plane_copy_swap_neon;
pf->plane_copy_deinterleave = x264_plane_copy_deinterleave_neon;
pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_neon;
pf->plane_copy_interleave = x264_plane_copy_interleave_neon;