From: David Conrad Date: Mon, 9 Nov 2009 04:12:54 +0000 (-0800) Subject: Fix weightp on ARM + PPC X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=094110915e2de3410feca47463bae4a8b28f587e;p=libx264 Fix weightp on ARM + PPC No ARM or PPC assembly yet though. --- diff --git a/Makefile b/Makefile index 8ba8d815..2fe33616 100644 --- a/Makefile +++ b/Makefile @@ -64,11 +64,9 @@ endif # AltiVec optims ifeq ($(ARCH),PPC) -ALTIVECSRC += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \ - common/ppc/quant.c common/ppc/deblock.c \ - common/ppc/predict.c -SRCS += $(ALTIVECSRC) -$(ALTIVECSRC:%.c=%.o): CFLAGS += $(ALTIVECFLAGS) +SRCS += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \ + common/ppc/quant.c common/ppc/deblock.c \ + common/ppc/predict.c endif # NEON optims diff --git a/common/arm/mc-c.c b/common/arm/mc-c.c index c6aaeb0e..167b11bb 100644 --- a/common/arm/mc-c.c +++ b/common/arm/mc-c.c @@ -76,7 +76,7 @@ static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2}; static void mc_luma_neon( uint8_t *dst, int i_dst_stride, uint8_t *src[4], int i_src_stride, int mvx, int mvy, - int i_width, int i_height ) + int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); @@ -90,18 +90,19 @@ static void mc_luma_neon( uint8_t *dst, int i_dst_stride, x264_pixel_avg_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); + if( weight->weightfn ) + weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height ); } + else if( weight->weightfn ) + weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height ); else - { - x264_mc_copy_wtab_neon[i_width>>2]( - dst, i_dst_stride, src1, i_src_stride, i_height ); - } + x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height ); } static uint8_t *get_ref_neon( uint8_t *dst, int *i_dst_stride, uint8_t *src[4], int i_src_stride, int mvx, int mvy, - int i_width, int i_height ) + int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); @@ -115,6 +116,13 @@ static uint8_t *get_ref_neon( uint8_t *dst, int *i_dst_stride, x264_pixel_avg_wtab_neon[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); + if( weight->weightfn ) + weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height ); + return dst; + } + else if( weight->weightfn ) + { + weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height ); return dst; } else diff --git a/common/ppc/mc.c b/common/ppc/mc.c index c703d085..a588d8f2 100644 --- a/common/ppc/mc.c +++ b/common/ppc/mc.c @@ -181,7 +181,7 @@ static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, int i_dst, static void mc_luma_altivec( uint8_t *dst, int i_dst_stride, uint8_t *src[4], int i_src_stride, int mvx, int mvy, - int i_width, int i_height ) + int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); @@ -201,8 +201,11 @@ static void mc_luma_altivec( uint8_t *dst, int i_dst_stride, default: x264_pixel_avg2_w16_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height ); } - + if( weight->weightfn ) + weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height ); } + else if( weight->weightfn ) + weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height ); else { switch(i_width) { @@ -224,7 +227,7 @@ static void mc_luma_altivec( uint8_t *dst, int i_dst_stride, static uint8_t *get_ref_altivec( uint8_t *dst, int *i_dst_stride, uint8_t *src[4], int i_src_stride, int mvx, int mvy, - int i_width, int i_height ) + int i_width, int i_height, const x264_weight_t *weight ) { int qpel_idx = ((mvy&3)<<2) + (mvx&3); int offset = (mvy>>2)*i_src_stride + (mvx>>2); @@ -248,6 +251,13 @@ static uint8_t *get_ref_altivec( uint8_t *dst, int *i_dst_stride, x264_pixel_avg2_w20_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height ); break; } + if( weight->weightfn ) + weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height ); + return dst; + } + else if( weight->weightfn ) + { + weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height ); return dst; } else diff --git a/configure b/configure index d26ace16..7cb32c00 100755 --- a/configure +++ b/configure @@ -267,9 +267,9 @@ case $host_cpu in ARCH="PPC" if [ $SYS = MACOSX ] then - ALTIVECFLAGS="$ALTIVECFLAGS -faltivec -fastf -mcpu=G4" + CFLAGS="$CFLAGS -faltivec -fastf -mcpu=G4" else - ALTIVECFLAGS="$ALTIVECFLAGS -maltivec -mabi=altivec -DHAVE_ALTIVEC_H" + CFLAGS="$CFLAGS -maltivec -mabi=altivec -DHAVE_ALTIVEC_H" fi ;; sparc) @@ -467,7 +467,6 @@ ARCH=$ARCH SYS=$SYS CC=$CC CFLAGS=$CFLAGS -ALTIVECFLAGS=$ALTIVECFLAGS LDFLAGS=$LDFLAGS AR=$AR RANLIB=$RANLIB