}
}
+#if HAVE_VSX
+void x264_plane_copy_deinterleave_rgb_altivec( uint8_t *dsta, intptr_t i_dsta,
+ uint8_t *dstb, intptr_t i_dstb,
+ uint8_t *dstc, intptr_t i_dstc,
+ uint8_t *src, intptr_t i_src,
+ int pw, int w, int h )
+{
+ if( pw == 3 )
+ {
+ const vec_u8_t mask[4] = {
+ { 0x00, 0x03, 0x06, 0x09, 0x0C, 0x0F, 0x12, 0x15, 0x01, 0x04, 0x07, 0x0A, 0x0D, 0x10, 0x13, 0x16 },
+ { 0x08, 0x0B, 0x0E, 0x11, 0x14, 0x17, 0x1A, 0x1D, 0x09, 0x0C, 0x0F, 0x12, 0x15, 0x18, 0x1B, 0x1E },
+ { 0x02, 0x05, 0x08, 0x0B, 0x0E, 0x11, 0x14, 0x17, 0x1A, 0x1D, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10, 0x13, 0x16, 0x19, 0x1C, 0x1F }
+ };
+
+ for( int y = 0; y < h; y++, dsta += i_dsta, dstb += i_dstb, dstc += i_dstc, src += i_src )
+ {
+ for( int x = 0; x < w; x += 16 )
+ {
+ vec_u8_t srcv1 = vec_vsx_ld( 3 * x, src );
+ vec_u8_t srcv2 = vec_vsx_ld( 3 * x + 16, src );
+ vec_u8_t srcv3 = vec_vsx_ld( 3 * x + 32, src );
+ vec_u64_t tmp1 = (vec_u64_t)vec_perm( srcv1, srcv2, mask[0] ); // a0 a1 a2 a3 a4 a5 a6 a7 b0 b1 b2 b3 b4 b5 b6 b7
+ vec_u64_t tmp2 = (vec_u64_t)vec_perm( srcv2, srcv3, mask[1] ); // a8 a9 a10 a11 a12 a13 a14 a15 b8 b9 b10 b11 b12 b13 b14 b15
+ vec_st( (vec_u8_t)vec_mergeh( tmp1, tmp2 ), x, dsta );
+ vec_st( (vec_u8_t)vec_mergel( tmp1, tmp2 ), x, dstb );
+
+ srcv1 = vec_perm( srcv1, srcv2, mask[2] ); // c0 c1 c2 c3 c4 c5 c6 c7 c8 c9
+ srcv1 = vec_perm( srcv1, srcv3, mask[3] ); // c0 c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15
+ vec_st( srcv1, x, dstc );
+ }
+ }
+ }
+ else
+ {
+ const vec_u8_t mask[2] = {
+ { 0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C, 0x01, 0x05, 0x09, 0x0D, 0x11, 0x15, 0x19, 0x1D },
+ { 0x02, 0x06, 0x0A, 0x0E, 0x12, 0x16, 0x1A, 0x1E, 0x03, 0x07, 0x0B, 0x0F, 0x13, 0x17, 0x1B, 0x1F }
+ };
+
+ for( int y = 0; y < h; y++, dsta += i_dsta, dstb += i_dstb, dstc += i_dstc, src += i_src )
+ {
+ for( int x = 0; x < w; x += 16 )
+ {
+ vec_u8_t srcv1 = vec_vsx_ld( 4 * x, src );
+ vec_u8_t srcv2 = vec_vsx_ld( 4 * x + 16, src );
+ vec_u8_t srcv3 = vec_vsx_ld( 4 * x + 32, src );
+ vec_u8_t srcv4 = vec_vsx_ld( 4 * x + 48, src );
+
+ vec_u64_t tmp1 = (vec_u64_t)vec_perm( srcv1, srcv2, mask[0] ); // a0 a1 a2 a3 a4 a5 a6 a7 b0 b1 b2 b3 b4 b5 b6 b7
+ vec_u64_t tmp2 = (vec_u64_t)vec_perm( srcv3, srcv4, mask[0] ); // a8 a9 a10 a11 a12 a13 a14 a15 b8 b9 b10 b11 b12 b13 b14 b15
+ vec_st( (vec_u8_t)vec_mergeh( tmp1, tmp2 ), x, dsta );
+ vec_st( (vec_u8_t)vec_mergel( tmp1, tmp2 ), x, dstb );
+
+ tmp1 = (vec_u64_t)vec_perm( srcv1, srcv2, mask[1] ); // c0 c1 c2 c3 c4 c5 c6 c7
+ tmp2 = (vec_u64_t)vec_perm( srcv3, srcv4, mask[1] ); // c8 c9 c10 c11 c12 c13 c14 c15
+ vec_st( (vec_u8_t)vec_mergeh( tmp1, tmp2 ), x, dstc );
+ }
+ }
+ }
+}
+#endif
+
static void mc_luma_altivec( uint8_t *dst, intptr_t i_dst_stride,
uint8_t *src[4], intptr_t i_src_stride,
int mvx, int mvy,
pf->plane_copy_swap = x264_plane_copy_swap_altivec;
pf->plane_copy_interleave = x264_plane_copy_interleave_altivec;
pf->store_interleave_chroma = x264_store_interleave_chroma_altivec;
+#if HAVE_VSX
+ pf->plane_copy_deinterleave_rgb = x264_plane_copy_deinterleave_rgb_altivec;
+#endif // HAVE_VSX
#endif // !HIGH_BIT_DEPTH
}