From 6e74eb5af2f28ab30d2c28a86f921b56e94f04f7 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Sun, 19 Aug 2018 09:28:40 +0200 Subject: [PATCH] ppc: Rework the adds in satd8x8 10% faster. --- common/ppc/pixel.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/common/ppc/pixel.c b/common/ppc/pixel.c index 5e1a1cec..77a0cf75 100644 --- a/common/ppc/pixel.c +++ b/common/ppc/pixel.c @@ -292,14 +292,29 @@ static int pixel_satd_8x8_altivec( uint8_t *pix1, intptr_t i_pix1, VEC_HADAMAR( diff4v, diff5v, diff6v, diff7v, temp4v, temp5v, temp6v, temp7v ); - VEC_ADD_ABS( temp0v, zero_s32v, satdv ); - VEC_ADD_ABS( temp1v, satdv, satdv ); - VEC_ADD_ABS( temp2v, satdv, satdv ); - VEC_ADD_ABS( temp3v, satdv, satdv ); - VEC_ADD_ABS( temp4v, satdv, satdv ); - VEC_ADD_ABS( temp5v, satdv, satdv ); - VEC_ADD_ABS( temp6v, satdv, satdv ); - VEC_ADD_ABS( temp7v, satdv, satdv ); + vec_s16_t t0 = vec_abs( temp0v ); + vec_s16_t t1 = vec_abs( temp1v ); + vec_s16_t t2 = vec_abs( temp2v ); + vec_s16_t t3 = vec_abs( temp3v ); + + vec_s16_t s0 = vec_adds( t0, t1 ); + vec_s16_t s1 = vec_adds( t2, t3 ); + + vec_s32_t s01 = vec_sum4s( s0, zero_s32v ); + vec_s32_t s23 = vec_sum4s( s1, zero_s32v ); + + vec_s16_t t4 = vec_abs( temp4v ); + vec_s16_t t5 = vec_abs( temp5v ); + vec_s16_t t6 = vec_abs( temp6v ); + vec_s16_t t7 = vec_abs( temp7v ); + + vec_s16_t s2 = vec_adds( t4, t5 ); + vec_s16_t s3 = vec_adds( t6, t7 ); + + vec_s32_t s0145 = vec_sum4s( s2, s01 ); + vec_s32_t s2367 = vec_sum4s( s3, s23 ); + + satdv = vec_add( s0145, s2367 ); satdv = vec_sums( satdv, zero_s32v ); satdv = vec_splat( satdv, 3 ); -- 2.40.0