From: Loren Merritt Date: Thu, 24 Feb 2005 13:09:55 +0000 (+0000) Subject: Altivec functions for MC using the cached halfpel planes. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=067f22c153eaf19e1ba5ec35deef96a8fb3eae4e;p=libx264 Altivec functions for MC using the cached halfpel planes. Patch by Fredrik Pettersson . git-svn-id: svn://svn.videolan.org/x264/trunk@142 df754926-b1dd-0310-bc7b-ec298dee348c --- diff --git a/common/mc.c b/common/mc.c index 7e4e3ea0..ea085daa 100644 --- a/common/mc.c +++ b/common/mc.c @@ -393,12 +393,10 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf ) if( cpu&X264_CPU_SSE2 ) x264_mc_sse2_init( pf ); #endif -/* #ifdef ARCH_PPC if( cpu&X264_CPU_ALTIVEC ) x264_mc_altivec_init( pf ); #endif -*/ } void get_funcs_mmx(pf_mc_t*, pf_mc_t*, pf_mc_t*); diff --git a/common/ppc/mc.c b/common/ppc/mc.c index 901d285e..14ea74be 100644 --- a/common/ppc/mc.c +++ b/common/ppc/mc.c @@ -680,7 +680,115 @@ static void motion_compensation_luma( uint8_t *src, int i_src, } } -void x264_mc_altivec_init( x264_mc_function_t pf[2] ) +void mc_luma_altivec( uint8_t *src[4], int i_src_stride, + uint8_t *dst, int i_dst_stride, + int mvx, int mvy, + int i_width, int i_height ) +{ + uint8_t *src1, *src2; + + /* todo : fixme... */ + int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0; + + int hpel1x = mvx>>1; + int hpel1y = (mvy+1-correction)>>1; + int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 ); + + + src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1); + + if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */ + { + int hpel2x = (mvx+1)>>1; + int hpel2y = (mvy+correction)>>1; + int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 ); + + src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1); + + switch(i_width) { + case 4: + pixel_avg_w4( dst, i_dst_stride, src1, i_src_stride, + src2, i_src_stride, i_height ); + break; + case 8: + pixel_avg_w8( dst, i_dst_stride, src1, i_src_stride, + src2, i_src_stride, i_height ); + break; + case 16: + default: + pixel_avg_w16( dst, i_dst_stride, src1, i_src_stride, + src2, i_src_stride, i_height ); + } + + } + else + { + switch(i_width) { + case 4: + mc_copy_w4( src1, i_src_stride, dst, i_dst_stride, i_height ); + break; + case 8: + mc_copy_w8( src1, i_src_stride, dst, i_dst_stride, i_height ); + break; + case 16: + mc_copy_w16( src1, i_src_stride, dst, i_dst_stride, i_height ); + break; + } + + } +} + +uint8_t *get_ref_altivec( uint8_t *src[4], int i_src_stride, + uint8_t *dst, int * i_dst_stride, + int mvx, int mvy, + int i_width, int i_height ) +{ + uint8_t *src1, *src2; + + /* todo : fixme... */ + int correction = ((mvx&3) == 3 && (mvy&3) == 1 || (mvx&3) == 1 && (mvy&3) == 3) ? 1:0; + + int hpel1x = mvx>>1; + int hpel1y = (mvy+1-correction)>>1; + int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 ); + + + src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1); + + if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */ + { + int hpel2x = (mvx+1)>>1; + int hpel2y = (mvy+correction)>>1; + int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 ); + + src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1); + + switch(i_width) { + case 4: + pixel_avg_w4( dst, *i_dst_stride, src1, i_src_stride, + src2, i_src_stride, i_height ); + break; + case 8: + pixel_avg_w8( dst, *i_dst_stride, src1, i_src_stride, + src2, i_src_stride, i_height ); + break; + case 16: + default: + pixel_avg_w16( dst, *i_dst_stride, src1, i_src_stride, + src2, i_src_stride, i_height ); + } + return dst; + + } + else + { + *i_dst_stride = i_src_stride; + return src1; + } +} + +void x264_mc_altivec_init( x264_mc_functions_t *pf ) { - pf[MC_LUMA] = motion_compensation_luma; + pf->mc_luma = mc_luma_altivec; + pf->get_ref = get_ref_altivec; } diff --git a/common/ppc/mc.h b/common/ppc/mc.h index cf006f2b..e3779217 100644 --- a/common/ppc/mc.h +++ b/common/ppc/mc.h @@ -24,6 +24,6 @@ #ifndef _PPC_MC_H #define _PPC_MC_H 1 -void x264_mc_altivec_init( x264_mc_function_t pf[2] ); +void x264_mc_altivec_init( x264_mc_functions_t *pf ); #endif