From 315285741877f89c660b9cefc3114963e95cf56a Mon Sep 17 00:00:00 2001
From: Loren Merritt <pengvado@videolan.org>
Date: Sun, 2 Mar 2008 02:26:00 +0000
Subject: [PATCH] cosmetics in dsp init

git-svn-id: svn://svn.videolan.org/x264/trunk@743 df754926-b1dd-0310-bc7b-ec298dee348c
---
 common/i386/mc-c.c      | 40 +++++++++++++-----------
 common/i386/mc.h        |  3 +-
 common/i386/predict-c.c | 68 +++++++++++++++++++++++------------------
 common/i386/predict.h   | 10 +++---
 common/mc.c             |  7 ++---
 common/predict.c        | 30 +++---------------
 6 files changed, 71 insertions(+), 87 deletions(-)

diff --git a/common/i386/mc-c.c b/common/i386/mc-c.c
index 8ac3bf66..212a8b22 100644
--- a/common/i386/mc-c.c
+++ b/common/i386/mc-c.c
@@ -84,10 +84,10 @@ static void (* const x264_mc_copy_wtab_mmx[5])( uint8_t *, int, uint8_t *, int,
 static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
 static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
 
-void mc_luma_mmx( uint8_t *dst,    int i_dst_stride,
-                  uint8_t *src[4], int i_src_stride,
-                  int mvx, int mvy,
-                  int i_width, int i_height )
+void mc_luma_mmxext( uint8_t *dst,    int i_dst_stride,
+                     uint8_t *src[4], int i_src_stride,
+                     int mvx, int mvy,
+                     int i_width, int i_height )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -107,10 +107,10 @@ void mc_luma_mmx( uint8_t *dst,    int i_dst_stride,
     }
 }
 
-uint8_t *get_ref_mmx( uint8_t *dst,   int *i_dst_stride,
-                      uint8_t *src[4], int i_src_stride,
-                      int mvx, int mvy,
-                      int i_width, int i_height )
+uint8_t *get_ref_mmxext( uint8_t *dst,   int *i_dst_stride,
+                         uint8_t *src[4], int i_src_stride,
+                         int mvx, int mvy,
+                         int i_width, int i_height )
 {
     int qpel_idx = ((mvy&3)<<2) + (mvx&3);
     int offset = (mvy>>2)*i_src_stride + (mvx>>2);
@@ -132,10 +132,20 @@ uint8_t *get_ref_mmx( uint8_t *dst,   int *i_dst_stride,
 }
 
 
-void x264_mc_mmxext_init( x264_mc_functions_t *pf )
+void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
 {
-    pf->mc_luma = mc_luma_mmx;
-    pf->get_ref = get_ref_mmx;
+    if( !(cpu&X264_CPU_MMX) )
+        return;
+
+    pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
+    pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_mmx;
+    pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_mmx;
+
+    if( !(cpu&X264_CPU_MMXEXT) )
+        return;
+
+    pf->mc_luma = mc_luma_mmxext;
+    pf->get_ref = get_ref_mmxext;
 
     pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_mmxext;
     pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_mmxext;
@@ -154,17 +164,11 @@ void x264_mc_mmxext_init( x264_mc_functions_t *pf )
     pf->avg_weight[PIXEL_4x4]   = x264_pixel_avg_weight_4x4_mmxext;
     // avg_weight_4x8 is rare and 4x2 is not used
 
-    pf->copy[PIXEL_16x16] = x264_mc_copy_w16_mmx;
-    pf->copy[PIXEL_8x8]   = x264_mc_copy_w8_mmx;
-    pf->copy[PIXEL_4x4]   = x264_mc_copy_w4_mmx;
-
     pf->plane_copy = x264_plane_copy_mmxext;
     pf->hpel_filter = x264_hpel_filter_mmxext;
 
     pf->prefetch_fenc = x264_prefetch_fenc_mmxext;
     pf->prefetch_ref  = x264_prefetch_ref_mmxext;
-}
-void x264_mc_sse2_init( x264_mc_functions_t *pf )
-{
+
     /* todo: use sse2 */
 }
diff --git a/common/i386/mc.h b/common/i386/mc.h
index bde31b0d..40f23596 100644
--- a/common/i386/mc.h
+++ b/common/i386/mc.h
@@ -24,8 +24,7 @@
 #ifndef _I386_MC_H
 #define _I386_MC_H 1
 
-void x264_mc_mmxext_init( x264_mc_functions_t *pf );
-void x264_mc_sse2_init( x264_mc_functions_t *pf );
+void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf );
 
 void x264_mc_chroma_mmxext( uint8_t *src, int i_src_stride,
                             uint8_t *dst, int i_dst_stride,
diff --git a/common/i386/predict-c.c b/common/i386/predict-c.c
index 956a4c17..63f48307 100644
--- a/common/i386/predict-c.c
+++ b/common/i386/predict-c.c
@@ -72,7 +72,7 @@ static void predict_16x16_p_##name( uint8_t *src )\
 PREDICT_16x16_P( mmxext )
 PREDICT_16x16_P( sse2   )
 
-static void predict_8x8c_p( uint8_t *src )
+static void predict_8x8c_p_mmxext( uint8_t *src )
 {
     int a, b, c, i;
     int H = 0;
@@ -109,7 +109,7 @@ static void predict_16x16_dc_##name( uint8_t *src )\
 PREDICT_16x16_DC( mmxext )
 PREDICT_16x16_DC( sse2   )
 
-static void predict_8x8c_dc( uint8_t *src )
+static void predict_8x8c_dc_mmxext( uint8_t *src )
 {
     int s2 = 4
        + src[-1 + 0*FDEC_STRIDE]
@@ -492,34 +492,48 @@ void x264_intra_sa8d_x3_8x8_mmxext( uint8_t *fenc, uint8_t edge[33], int res[3]
 /****************************************************************************
  * Exported functions:
  ****************************************************************************/
-void x264_predict_16x16_init_mmxext( x264_predict_t pf[7] )
+void x264_predict_16x16_init_mmx( int cpu, x264_predict_t pf[7] )
 {
-    pf[I_PRED_16x16_V]       = predict_16x16_v_mmx;
-    pf[I_PRED_16x16_DC]      = predict_16x16_dc_mmxext;
-    pf[I_PRED_16x16_DC_TOP]  = predict_16x16_dc_top_mmxext;
-    pf[I_PRED_16x16_P]       = predict_16x16_p_mmxext;
-
+    if( !(cpu&X264_CPU_MMX) )
+        return;
 #ifdef ARCH_X86_64
     pf[I_PRED_16x16_H]       = predict_16x16_h;
     pf[I_PRED_16x16_DC_LEFT] = predict_16x16_dc_left;
 #endif
+    pf[I_PRED_16x16_V]       = predict_16x16_v_mmx;
+    if( !(cpu&X264_CPU_MMXEXT) )
+        return;
+    pf[I_PRED_16x16_DC]      = predict_16x16_dc_mmxext;
+    pf[I_PRED_16x16_DC_TOP]  = predict_16x16_dc_top_mmxext;
+    pf[I_PRED_16x16_P]       = predict_16x16_p_mmxext;
+    if( !(cpu&X264_CPU_SSE2) || (cpu&X264_CPU_3DNOW) )
+        return;
+    pf[I_PRED_16x16_DC]     = predict_16x16_dc_sse2;
+    pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_sse2;
+    pf[I_PRED_16x16_V]      = predict_16x16_v_sse2;
+    pf[I_PRED_16x16_P]      = predict_16x16_p_sse2;
 }
 
-void x264_predict_8x8c_init_mmxext( x264_predict_t pf[7] )
+void x264_predict_8x8c_init_mmx( int cpu, x264_predict_t pf[7] )
 {
-    pf[I_PRED_CHROMA_V]       = predict_8x8c_v_mmx;
-    pf[I_PRED_CHROMA_P]       = predict_8x8c_p;
-    pf[I_PRED_CHROMA_DC]      = predict_8x8c_dc;
-
+    if( !(cpu&X264_CPU_MMX) )
+        return;
 #ifdef ARCH_X86_64
     pf[I_PRED_CHROMA_H]       = predict_8x8c_h;
     pf[I_PRED_CHROMA_DC_LEFT] = predict_8x8c_dc_left;
     pf[I_PRED_CHROMA_DC_TOP]  = predict_8x8c_dc_top;
 #endif
+    pf[I_PRED_CHROMA_V]       = predict_8x8c_v_mmx;
+    if( !(cpu&X264_CPU_MMXEXT) )
+        return;
+    pf[I_PRED_CHROMA_P]       = predict_8x8c_p_mmxext;
+    pf[I_PRED_CHROMA_DC]      = predict_8x8c_dc_mmxext;
 }
 
-void x264_predict_8x8_init_mmxext( x264_predict8x8_t pf[12] )
+void x264_predict_8x8_init_mmx( int cpu, x264_predict8x8_t pf[12] )
 {
+    if( !(cpu&X264_CPU_MMXEXT) )
+        return;
     pf[I_PRED_8x8_V]   = predict_8x8_v_mmxext;
     pf[I_PRED_8x8_DC]  = predict_8x8_dc_mmxext;
     pf[I_PRED_8x8_DC_TOP] = predict_8x8_dc_top_mmxext;
@@ -529,31 +543,25 @@ void x264_predict_8x8_init_mmxext( x264_predict8x8_t pf[12] )
 #ifdef ARCH_X86
     pf[I_PRED_8x8_DDR] = predict_8x8_ddr_mmxext;
 #endif
-}
-
-void x264_predict_8x8_init_sse2( x264_predict8x8_t pf[12] )
-{
+    if( !(cpu&X264_CPU_SSE2) )
+        return;
     pf[I_PRED_8x8_DDL] = predict_8x8_ddl_sse2;
     pf[I_PRED_8x8_VL]  = predict_8x8_vl_sse2;
     pf[I_PRED_8x8_DDR] = predict_8x8_ddr_sse2;
 }
 
-void x264_predict_4x4_init_mmxext( x264_predict_t pf[12] )
+void x264_predict_4x4_init_mmx( int cpu, x264_predict_t pf[12] )
 {
-    pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext;
-    pf[I_PRED_4x4_VL]  = predict_4x4_vl_mmxext;
-#ifdef ARCH_X86_64 // slower on x86
+    if( !(cpu&X264_CPU_MMX) )
+        return;
+#ifdef ARCH_X86_64
     pf[I_PRED_4x4_DDR] = predict_4x4_ddr;
     pf[I_PRED_4x4_VR]  = predict_4x4_vr;
     pf[I_PRED_4x4_HD]  = predict_4x4_hd;
     pf[I_PRED_4x4_HU]  = predict_4x4_hu;
 #endif
-}
-
-void x264_predict_16x16_init_sse2 ( x264_predict_t pf[7] )
-{
-    pf[I_PRED_16x16_DC]     = predict_16x16_dc_sse2;
-    pf[I_PRED_16x16_DC_TOP] = predict_16x16_dc_top_sse2;
-    pf[I_PRED_16x16_V]      = predict_16x16_v_sse2;
-    pf[I_PRED_16x16_P]      = predict_16x16_p_sse2;
+    if( !(cpu&X264_CPU_MMXEXT) )
+        return;
+    pf[I_PRED_4x4_DDL] = predict_4x4_ddl_mmxext;
+    pf[I_PRED_4x4_VL]  = predict_4x4_vl_mmxext;
 }
diff --git a/common/i386/predict.h b/common/i386/predict.h
index 49d892d0..4db2e91e 100644
--- a/common/i386/predict.h
+++ b/common/i386/predict.h
@@ -24,11 +24,9 @@
 #ifndef _I386_PREDICT_H
 #define _I386_PREDICT_H 1
 
-void x264_predict_16x16_init_mmxext ( x264_predict_t pf[7] );
-void x264_predict_8x8c_init_mmxext  ( x264_predict_t pf[7] );
-void x264_predict_4x4_init_mmxext   ( x264_predict_t pf[12] );
-void x264_predict_8x8_init_mmxext   ( x264_predict8x8_t pf[12] );
-void x264_predict_8x8_init_sse2     ( x264_predict8x8_t pf[12] );
-void x264_predict_16x16_init_sse2 ( x264_predict_t pf[7] );
+void x264_predict_16x16_init_mmx ( int cpu, x264_predict_t pf[7] );
+void x264_predict_8x8c_init_mmx  ( int cpu, x264_predict_t pf[7] );
+void x264_predict_4x4_init_mmx   ( int cpu, x264_predict_t pf[12] );
+void x264_predict_8x8_init_mmx   ( int cpu, x264_predict8x8_t pf[12] );
 
 #endif
diff --git a/common/mc.c b/common/mc.c
index 6ce44e34..cc7032f0 100644
--- a/common/mc.c
+++ b/common/mc.c
@@ -375,12 +375,9 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf )
     pf->prefetch_ref  = prefetch_ref_null;
 
 #ifdef HAVE_MMX
-    if( cpu&X264_CPU_MMXEXT ) {
-        x264_mc_mmxext_init( pf );
+    x264_mc_init_mmx( cpu, pf );
+    if( cpu&X264_CPU_MMXEXT )
         pf->mc_chroma = x264_mc_chroma_mmxext;
-    }
-    if( cpu&X264_CPU_SSE2 )
-        x264_mc_sse2_init( pf );
 #endif
 #ifdef ARCH_PPC
     if( cpu&X264_CPU_ALTIVEC )
diff --git a/common/predict.c b/common/predict.c
index 3b44723f..57f96215 100644
--- a/common/predict.c
+++ b/common/predict.c
@@ -847,15 +847,7 @@ void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] )
     pf[I_PRED_16x16_DC_128 ]= predict_16x16_dc_128;
 
 #ifdef HAVE_MMX
-    if( cpu&X264_CPU_MMXEXT )
-    {
-        x264_predict_16x16_init_mmxext( pf );
-    }
-    // disable on AMD processors since it is slower
-    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
-    {
-        x264_predict_16x16_init_sse2( pf );
-    }
+    x264_predict_16x16_init_mmx( cpu, pf );
 #endif
 
 #ifdef ARCH_PPC
@@ -877,10 +869,7 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] )
     pf[I_PRED_CHROMA_DC_128 ]= predict_8x8c_dc_128;
 
 #ifdef HAVE_MMX
-    if( cpu&X264_CPU_MMXEXT )
-    {
-        x264_predict_8x8c_init_mmxext( pf );
-    }
+    x264_predict_8x8c_init_mmx( cpu, pf );
 #endif
 }
 
@@ -900,15 +889,7 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12] )
     pf[I_PRED_8x8_DC_128] = predict_8x8_dc_128;
 
 #ifdef HAVE_MMX
-    if( cpu&X264_CPU_MMXEXT )
-    {
-        x264_predict_8x8_init_mmxext( pf );
-    }
-    // disable on AMD processors since it is slower
-    if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_3DNOW) )
-    {
-        x264_predict_8x8_init_sse2( pf );
-    }
+    x264_predict_8x8_init_mmx( cpu, pf );
 #endif
 }
 
@@ -928,10 +909,7 @@ void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] )
     pf[I_PRED_4x4_DC_128] = predict_4x4_dc_128;
 
 #ifdef HAVE_MMX
-    if( cpu&X264_CPU_MMXEXT )
-    {
-        x264_predict_4x4_init_mmxext( pf );
-    }
+    x264_predict_4x4_init_mmx( cpu, pf );
 #endif
 }
 
-- 
2.40.0