From 3a61047871d39ddaecfb58f78ce5235ca9786a2d Mon Sep 17 00:00:00 2001 From: David Wolstencroft Date: Sat, 20 Jun 2009 21:42:55 +0200 Subject: [PATCH] AltiVec version of frame_init_lowres_core. 22.4x faster than C on PPC7450 and 25x on PPC970MP. --- common/ppc/mc.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/common/ppc/mc.c b/common/ppc/mc.c index fe5f7b8f..56ec9c1a 100644 --- a/common/ppc/mc.c +++ b/common/ppc/mc.c @@ -709,6 +709,99 @@ void x264_hpel_filter_altivec( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint } } +static void frame_init_lowres_core_altivec( uint8_t *src0, uint8_t *dst0, uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, + int src_stride, int dst_stride, int width, int height ) +{ + int w = width/16; + int end = (width & 15); + int x, y; + vec_u8_t src0v, src1v, src2v; + vec_u8_t lv, hv, src1p1v; + vec_u8_t avg0v, avg1v, avghv, avghp1v, avgleftv, avgrightv; + static const vec_u8_t inverse_bridge_shuffle = CV(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1A, 0x1C, 0x1E ); + + for( y=0; ymc_luma = mc_luma_altivec; @@ -719,4 +812,5 @@ void x264_mc_altivec_init( x264_mc_functions_t *pf ) pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_altivec; pf->hpel_filter = x264_hpel_filter_altivec; + pf->frame_init_lowres_core = frame_init_lowres_core_altivec; } -- 2.40.0