]> granicus.if.org Git - libx264/commitdiff
Fix weightp on ARM + PPC
authorDavid Conrad <lessen42@gmail.com>
Mon, 9 Nov 2009 04:12:54 +0000 (20:12 -0800)
committerFiona Glaser <fiona@x264.com>
Mon, 9 Nov 2009 04:21:52 +0000 (20:21 -0800)
No ARM or PPC assembly yet though.

Makefile
common/arm/mc-c.c
common/ppc/mc.c
configure

index 8ba8d81519badda046e9f379e569a8f730366502..2fe33616ff987dc9098b025943153dc704f5cfec 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -64,11 +64,9 @@ endif
 
 # AltiVec optims
 ifeq ($(ARCH),PPC)
-ALTIVECSRC += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \
-              common/ppc/quant.c common/ppc/deblock.c \
-              common/ppc/predict.c
-SRCS += $(ALTIVECSRC)
-$(ALTIVECSRC:%.c=%.o): CFLAGS += $(ALTIVECFLAGS)
+SRCS += common/ppc/mc.c common/ppc/pixel.c common/ppc/dct.c \
+        common/ppc/quant.c common/ppc/deblock.c \
+        common/ppc/predict.c
 endif
 
 # NEON optims
index c6aaeb0e623a0eba7339bc6cd41842b96ac7ee69..167b11bb48a5db07b2fc89f35753075f24a7fa3e 100644 (file)
@@ -76,7 +76,7 @@ static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
 static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
                           uint8_t *src[4], int i_src_stride,
                           int mvx, int mvy,
-                          int i_width, int i_height )
+                          int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -90,18 +90,19 @@ static void mc_luma_neon( uint8_t *dst,    int i_dst_stride,
         x264_pixel_avg_wtab_neon[i_width>>2](
                 dst, i_dst_stride, src1, i_src_stride,
                 src2, i_height );
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
     }
+    else if( weight->weightfn )
+        weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
     else
-    {
-        x264_mc_copy_wtab_neon[i_width>>2](
-                dst, i_dst_stride, src1, i_src_stride, i_height );
-    }
+        x264_mc_copy_wtab_neon[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, i_height );
 }
 
 static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
                               uint8_t *src[4], int i_src_stride,
                               int mvx, int mvy,
-                              int i_width, int i_height )
+                              int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -115,6 +116,13 @@ static uint8_t *get_ref_neon( uint8_t *dst,   int *i_dst_stride,
         x264_pixel_avg_wtab_neon[i_width>>2](
                 dst, *i_dst_stride, src1, i_src_stride,
                 src2, i_height );
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height );
+        return dst;
+    }
+    else if( weight->weightfn )
+    {
+        weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );
         return dst;
     }
     else
index c703d085bc66be163531cf4c7624174a38bc6a27..a588d8f2ec63bf505c400ffab42ebfde633683ac 100644 (file)
@@ -181,7 +181,7 @@ static void x264_mc_copy_w16_aligned_altivec( uint8_t *dst, int i_dst,
 static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
                              uint8_t *src[4], int i_src_stride,
                              int mvx, int mvy,
-                             int i_width, int i_height )
+                             int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -201,8 +201,11 @@ static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
         default:
             x264_pixel_avg2_w16_altivec( dst, i_dst_stride, src1, i_src_stride, src2, i_height );
         }
-
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, i_dst_stride, dst, i_dst_stride, weight, i_height );
     }
+    else if( weight->weightfn )
+        weight->weightfn[i_width>>2]( dst, i_dst_stride, src1, i_src_stride, weight, i_height );
     else
     {
         switch(i_width) {
@@ -224,7 +227,7 @@ static void mc_luma_altivec( uint8_t *dst,    int i_dst_stride,
 static uint8_t *get_ref_altivec( uint8_t *dst,   int *i_dst_stride,
                                  uint8_t *src[4], int i_src_stride,
                                  int mvx, int mvy,
-                                 int i_width, int i_height )
+                                 int i_width, int i_height, const x264_weight_t *weight )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -248,6 +251,13 @@ static uint8_t *get_ref_altivec( uint8_t *dst,   int *i_dst_stride,
             x264_pixel_avg2_w20_altivec( dst, *i_dst_stride, src1, i_src_stride, src2, i_height );
             break;
         }
+        if( weight->weightfn )
+            weight->weightfn[i_width>>2]( dst, *i_dst_stride, dst, *i_dst_stride, weight, i_height );
+        return dst;
+    }
+    else if( weight->weightfn )
+    {
+        weight->weightfn[i_width>>2]( dst, *i_dst_stride, src1, i_src_stride, weight, i_height );
         return dst;
     }
     else
index d26ace16cc036033cf36056298c91016e3b7e8c8..7cb32c00cff516b5ada3808d97ca91726b7a3c5c 100755 (executable)
--- a/configure
+++ b/configure
@@ -267,9 +267,9 @@ case $host_cpu in
     ARCH="PPC"
     if [ $SYS = MACOSX ]
     then
-      ALTIVECFLAGS="$ALTIVECFLAGS -faltivec -fastf -mcpu=G4"
+      CFLAGS="$CFLAGS -faltivec -fastf -mcpu=G4"
     else
-      ALTIVECFLAGS="$ALTIVECFLAGS -maltivec -mabi=altivec -DHAVE_ALTIVEC_H"
+      CFLAGS="$CFLAGS -maltivec -mabi=altivec -DHAVE_ALTIVEC_H"
     fi
     ;;
   sparc)
@@ -467,7 +467,6 @@ ARCH=$ARCH
 SYS=$SYS
 CC=$CC
 CFLAGS=$CFLAGS
-ALTIVECFLAGS=$ALTIVECFLAGS
 LDFLAGS=$LDFLAGS
 AR=$AR
 RANLIB=$RANLIB