drop support for pre-SSE3 assemblers

author Loren Merritt <pengvado@akuvian.org>

Tue, 22 Apr 2008 23:16:25 +0000 (17:16 -0600)

committer Loren Merritt <pengvado@akuvian.org>

Sun, 27 Apr 2008 08:40:52 +0000 (02:40 -0600)
author Loren Merritt <pengvado@akuvian.org>
Tue, 22 Apr 2008 23:16:25 +0000 (17:16 -0600)
committer Loren Merritt <pengvado@akuvian.org>
Sun, 27 Apr 2008 08:40:52 +0000 (02:40 -0600)
diff --git a/common/cpu.c b/common/cpu.c

index f79f0031f52ba213a05bf87301e6a942c2802a04..47a72f769ca13c93e67ee384022cff0cb6409c4f 100644 (file)
--- a/common/cpu.c
+++ b/common/cpu.c
@@ -84,12 +84,10 @@ uint32_t x264_cpu_detect( void )
          cpu |= X264_CPU_MMXEXT|X264_CPU_SSE;
      if( edx&0x04000000 )
          cpu |= X264_CPU_SSE2;
-#ifdef HAVE_SSE3
      if( ecx&0x00000001 )
          cpu |= X264_CPU_SSE3;
      if( ecx&0x00000200 )
          cpu |= X264_CPU_SSSE3;
-#endif
  
      x264_cpu_cpuid( 0x80000000, &eax, &ebx, &ecx, &edx );
      max_extended_cap = eax;
diff --git a/common/dct.c b/common/dct.c

index bdc92929ef4120c499037549451584c6155f7bce..669e24f34ad63409a898103bdfd4c9b15fb98777 100644 (file)
--- a/common/dct.c
+++ b/common/dct.c
@@ -580,7 +580,7 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf, int b_interlaced )
          pf->scan_8x8   = zigzag_scan_8x8_frame;
          pf->scan_4x4   = zigzag_scan_4x4_frame;
          pf->sub_4x4    = zigzag_sub_4x4_frame;
-#ifdef HAVE_SSE3
+#ifdef HAVE_MMX
          if( cpu&X264_CPU_SSSE3 )
              pf->sub_4x4 = x264_zigzag_sub_4x4_frame_ssse3;
  #endif
diff --git a/common/pixel.c b/common/pixel.c

index 1d5567b6ee7fb800130b5ebbedfc21f1f2ef8bc3..133968cc8f08d3f39509b6c7f331702f964a1682 100644 (file)
--- a/common/pixel.c
+++ b/common/pixel.c
@@ -359,10 +359,8 @@ SATD_X_DECL7()
  #ifdef HAVE_MMX
  SATD_X_DECL7( _mmxext )
  SATD_X_DECL5( _sse2 )
-#ifdef HAVE_SSE3
  SATD_X_DECL7( _ssse3 )
  #endif
-#endif
  
  /****************************************************************************
   * structural similarity metric
@@ -623,7 +621,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
  #endif
      }
  
-#ifdef HAVE_SSE3
      if( (cpu&X264_CPU_SSE3) && (cpu&X264_CPU_CACHELINE_SPLIT) )
      {
          INIT2( sad, _sse3 );
@@ -652,7 +649,6 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
              INIT2( sad_x4, _cache64_ssse3 );
          }
      }
-#endif //HAVE_SSE3
  #endif //HAVE_MMX
  
  #ifdef ARCH_PPC
diff --git a/common/quant.c b/common/quant.c

index 270f97985933562e4c989b280367190f39983e74..38581f4523941204f00b2bfbd0cf037ab7d40143 100644 (file)
--- a/common/quant.c
+++ b/common/quant.c
@@ -240,16 +240,14 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
              pf->dequant_8x8 = x264_dequant_8x8_flat16_sse2;
          }
      }
-#endif
  
-#ifdef HAVE_SSE3
      if( cpu&X264_CPU_SSSE3 )
      {
          pf->quant_4x4_dc = x264_quant_4x4_dc_ssse3;
          pf->quant_4x4 = x264_quant_4x4_ssse3;
          pf->quant_8x8 = x264_quant_8x8_ssse3;
      }
-#endif
+#endif // HAVE_MMX
  
  #ifdef ARCH_PPC
      if( cpu&X264_CPU_ALTIVEC ) {
diff --git a/common/x86/dct-a.asm b/common/x86/dct-a.asm

index 5491b2380e3f5958670faafae88e00a1f9867a73..77baddaabd01dea9d3c25b13c1bf960f9b4d9a98 100644 (file)
--- a/common/x86/dct-a.asm
+++ b/common/x86/dct-a.asm
@@ -325,7 +325,6 @@ cglobal x264_zigzag_scan_4x4_field_mmxext, 2,3
      mov    [r0+12], r2d
      RET
  
-%ifdef HAVE_SSE3
  ;-----------------------------------------------------------------------------
  ; void x264_zigzag_sub_4x4_frame_ssse3( int16_t level[16], const uint8_t *src, uint8_t *dst )
  ;-----------------------------------------------------------------------------
@@ -364,4 +363,3 @@ cglobal x264_zigzag_sub_4x4_frame_ssse3, 3,3
      movdqa    [r0], xmm0
      movdqa [r0+16], xmm1
      RET
-%endif
diff --git a/common/x86/mc-a.asm b/common/x86/mc-a.asm

index ed1e3326f4222686a75ca524d4efaf517212c330..bbf8539292f00f43377053c7c352b2974cf4aef7 100644 (file)
--- a/common/x86/mc-a.asm
+++ b/common/x86/mc-a.asm
@@ -275,11 +275,9 @@ cglobal x264_pixel_avg2_w20_%1, 6,7
  %endmacro
  
  PIXEL_AVG_SSE sse2
-%ifdef HAVE_SSE3
  %define movdqu lddqu
  PIXEL_AVG_SSE sse3
  %undef movdqu
-%endif
  
  ; Cacheline split code for processors with high latencies for loads
  ; split over cache lines.  See sad-a.asm for a more detailed explanation.
@@ -481,9 +479,7 @@ cglobal %1, 5,7
  COPY_W16_SSE2 x264_mc_copy_w16_sse2, movdqu
  ; cacheline split with mmx has too much overhead; the speed benefit is near-zero.
  ; but with SSE3 the overhead is zero, so there's no reason not to include it.
-%ifdef HAVE_SSE3
  COPY_W16_SSE2 x264_mc_copy_w16_sse3, lddqu
-%endif
  COPY_W16_SSE2 x264_mc_copy_w16_aligned_sse2, movdqa
  
  
diff --git a/common/x86/mc-a2.asm b/common/x86/mc-a2.asm

index 3b04b70cecef510fda78bea80293713b231b7480..b05d2944e766a9ad6fb17b0f9844f7779cd1d7c7 100644 (file)
--- a/common/x86/mc-a2.asm
+++ b/common/x86/mc-a2.asm
@@ -309,10 +309,8 @@ cglobal x264_hpel_filter_h_sse2, 3,3,1
  %define PALIGNR PALIGNR_SSE2
  HPEL_V sse2
  HPEL_C sse2
-%ifdef HAVE_SSE3
  %define PALIGNR PALIGNR_SSSE3
  HPEL_C ssse3
-%endif
  
  cglobal x264_sfence
      sfence
diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c

index fd202da45119e2ef0c925b2daafdde0c1d031ea1..1144c36f4f83d8dd1d7d7fb77c3b7b71f7363090 100644 (file)
--- a/common/x86/mc-c.c
+++ b/common/x86/mc-c.c
@@ -102,9 +102,7 @@ PIXEL_AVG_WTAB(cache32_mmxext, mmxext, cache32_mmxext, cache32_mmxext, cache32_m
  PIXEL_AVG_WTAB(cache64_mmxext, mmxext, cache64_mmxext, cache64_mmxext, cache64_mmxext, cache64_mmxext)
  PIXEL_AVG_WTAB(sse2, mmxext, mmxext, mmxext, sse2, sse2)
  PIXEL_AVG_WTAB(cache64_sse2, mmxext, cache64_mmxext, cache64_sse2, cache64_sse2, cache64_sse2)
-#ifdef HAVE_SSE3
  PIXEL_AVG_WTAB(cache64_sse3, mmxext, cache64_mmxext, sse3, sse3, sse3)
-#endif
  
  #define MC_COPY_WTAB(instr, name1, name2, name3)\
  static void (* const x264_mc_copy_wtab_##instr[5])( uint8_t *, int, uint8_t *, int, int ) =\
@@ -118,9 +116,7 @@ static void (* const x264_mc_copy_wtab_##instr[5])( uint8_t *, int, uint8_t *, i
  
  MC_COPY_WTAB(mmx,mmx,mmx,mmx)
  MC_COPY_WTAB(sse2,mmx,mmx,sse2)
-#ifdef HAVE_SSE3
  MC_COPY_WTAB(sse3,mmx,mmx,sse3)
-#endif
  
  static const int hpel_ref0[16] = {0,1,1,1,0,1,1,1,2,3,3,3,0,1,1,1};
  static const int hpel_ref1[16] = {0,0,0,0,2,2,3,2,2,2,3,2,2,2,3,2};
@@ -155,9 +151,7 @@ MC_LUMA(cache64_mmxext,cache64_mmxext,mmx)
  #endif
  MC_LUMA(sse2,sse2,sse2)
  MC_LUMA(cache64_sse2,cache64_sse2,sse2)
-#ifdef HAVE_SSE3
  MC_LUMA(cache64_sse3,cache64_sse3,sse3)
-#endif
  
  #define GET_REF(name)\
  uint8_t *get_ref_##name( uint8_t *dst,   int *i_dst_stride,\
@@ -190,9 +184,7 @@ GET_REF(cache64_mmxext)
  #endif
  GET_REF(sse2)
  GET_REF(cache64_sse2)
-#ifdef HAVE_SSE3
  GET_REF(cache64_sse3)
-#endif
  
  #define HPEL(align, cpu, cpuv, cpuc, cpuh)\
  void x264_hpel_filter_v_##cpuv( uint8_t *dst, uint8_t *src, int16_t *buf, int stride, int width);\
@@ -227,9 +219,7 @@ void x264_hpel_filter_##cpu( uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_
  HPEL(8, mmxext, mmxext, mmxext, mmxext)
  HPEL(16, sse2_amd, mmxext, mmxext, sse2)
  HPEL(16, sse2, sse2, sse2, sse2)
-#ifdef HAVE_SSE3
  HPEL(16, ssse3, sse2, ssse3, sse2)
-#endif
  
  void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
  {
@@ -305,20 +295,16 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
      {
          pf->mc_luma = mc_luma_cache64_sse2;
          pf->get_ref = get_ref_cache64_sse2;
-#ifdef HAVE_SSE3
          /* lddqu doesn't work on Core2 */
          if( (cpu&X264_CPU_SSE3) && !(cpu&X264_CPU_SSSE3) )
          {
              pf->mc_luma = mc_luma_cache64_sse3;
              pf->get_ref = get_ref_cache64_sse3;
          }
-#endif
      }
  
      if( !(cpu&X264_CPU_SSSE3) )
          return;
  
-#ifdef HAVE_SSE3
      pf->hpel_filter = x264_hpel_filter_ssse3;
-#endif
  }
diff --git a/common/x86/pixel-a.asm b/common/x86/pixel-a.asm

index b4d065612d0773d764f3420a4230c0f27c6cc0d8..9eed1dbcc75365969a8b85d6c470b69b00166902 100644 (file)
--- a/common/x86/pixel-a.asm
+++ b/common/x86/pixel-a.asm
@@ -1272,7 +1272,6 @@ SATDS_SSE2 sse2
  SA8D_16x16_32 sse2
  INTRA_SA8D_SSE2 sse2
  INTRA_SATDS_MMX mmxext
-%ifdef HAVE_SSE3
  %define ABS1 ABS1_SSSE3
  %define ABS2 ABS2_SSSE3
  SATDS_SSE2 ssse3
@@ -1280,7 +1279,6 @@ SA8D_16x16_32 ssse3
  INTRA_SA8D_SSE2 ssse3
  INTRA_SATDS_MMX ssse3
  SATD_W4 ssse3 ; mmx, but uses pabsw from ssse3.
-%endif
  
  
  
@@ -1655,10 +1653,8 @@ cglobal x264_pixel_ads1_%1, 4,7
  %endmacro
  
  ADS_SSE2 sse2
-%ifdef HAVE_SSE3
  %define ABS1 ABS1_SSSE3
  ADS_SSE2 ssse3
-%endif
  
  ; int x264_pixel_ads_mvs( int16_t *mvs, uint8_t *masks, int width )
  ; {
diff --git a/common/x86/predict-c.c b/common/x86/predict-c.c

index 3982a0d92134640353c1d295a24eaee8bff5148f..18a115cbafe9d0de218197d3a58ac6167f7953c6 100644 (file)
--- a/common/x86/predict-c.c
+++ b/common/x86/predict-c.c
@@ -483,9 +483,7 @@ void x264_intra_sa8d_x3_8x8_##cpu( uint8_t *fenc, uint8_t edge[33], int res[3] )
  
  #ifdef ARCH_X86_64
  INTRA_SA8D_X3(sse2)
-#ifdef HAVE_SSE3
  INTRA_SA8D_X3(ssse3)
-#endif
  #else
  INTRA_SA8D_X3(mmxext)
  #endif
diff --git a/common/x86/quant-a.asm b/common/x86/quant-a.asm

index 90aebf7fbf9f950e0e656ab7da1999584deb254e..693432dd12158d51594b63a4833e618d89e65853 100644 (file)
--- a/common/x86/quant-a.asm
+++ b/common/x86/quant-a.asm
@@ -145,11 +145,9 @@ QUANT_DC x264_quant_4x4_dc_sse2, QUANT_MMX, 2, 16
  QUANT_AC x264_quant_4x4_sse2, QUANT_MMX, 2, 16
  QUANT_AC x264_quant_8x8_sse2, QUANT_MMX, 8, 16
  
-%ifdef HAVE_SSE3
  QUANT_DC x264_quant_4x4_dc_ssse3, QUANT_SSSE3, 2, 16
  QUANT_AC x264_quant_4x4_ssse3, QUANT_SSSE3, 2, 16
  QUANT_AC x264_quant_8x8_ssse3, QUANT_SSSE3, 8, 16
-%endif
  
  
  
diff --git a/common/x86/sad-a.asm b/common/x86/sad-a.asm

index 6e31921c5a1c7fc5f61ddf7107a56412f31e472a..3709e28c04af6dd01f58815573615bdb2807c7ac 100644 (file)
--- a/common/x86/sad-a.asm
+++ b/common/x86/sad-a.asm
@@ -25,7 +25,7 @@
  %include "x86inc.asm"
  
  SECTION_RODATA
-sw_64: dq 64
+sw_64: dd 64
  
  SECTION .text
  
@@ -213,11 +213,9 @@ cglobal x264_pixel_sad_16x8_%1, 4,4
  %endmacro
  
  SAD_W16 sse2
-%ifdef HAVE_SSE3
  %define movdqu lddqu
  SAD_W16 sse3
  %undef movdqu
-%endif
  
  
  
@@ -613,14 +611,12 @@ SAD_X_SSE2 3, 16,  8, sse2
  SAD_X_SSE2 4, 16, 16, sse2
  SAD_X_SSE2 4, 16,  8, sse2
  
-%ifdef HAVE_SSE3
  %define movdqu lddqu
  SAD_X_SSE2 3, 16, 16, sse3
  SAD_X_SSE2 3, 16,  8, sse3
  SAD_X_SSE2 4, 16, 16, sse3
  SAD_X_SSE2 4, 16,  8, sse3
  %undef movdqu
-%endif
  
  
  
@@ -961,7 +957,6 @@ SADX34_CACHELINE_FUNC 16, 16, 64, sse2, sse2
  SADX34_CACHELINE_FUNC 16,  8, 64, sse2, sse2
  %endif ; !ARCH_X86_64
  
-%ifdef HAVE_SSE3
  SAD16_CACHELINE_FUNC ssse3, 8
  SAD16_CACHELINE_FUNC ssse3, 16
  %assign i 1
@@ -971,4 +966,3 @@ SAD16_CACHELINE_LOOP_SSSE3 i
  %endrep
  SADX34_CACHELINE_FUNC 16, 16, 64, sse2, ssse3
  SADX34_CACHELINE_FUNC 16,  8, 64, sse2, ssse3
-%endif ; HAVE_SSE3
diff --git a/configure b/configure

index 19d76d481d67f43869b8e868903f296564fe5d95..10e0aa5d43d56b3c308eede40f9288072bea26ed 100755 (executable)
--- a/configure
+++ b/configure
@@ -321,12 +321,8 @@ if [ $ARCH = X86 -o $ARCH = X86_64 ] ; then
           echo "yasm prior to 0.6.2 miscompiles PIC. trying nasm instead..."
           AS=nasm
      fi
-    if as_check ; then
+    if as_check "pabsw xmm0, xmm0" ; then
          CFLAGS="$CFLAGS -DHAVE_MMX"
-        if as_check "pabsw xmm0, xmm0" ; then
-            ASFLAGS="$ASFLAGS -DHAVE_SSE3"
-            CFLAGS="$CFLAGS -DHAVE_SSE3"
-        fi
      else
          echo "No suitable assembler found.  x264 will be several times slower."
          echo "Please install 'yasm' to get MMX/SSE optimized code."
author	Loren Merritt <pengvado@akuvian.org>
	Tue, 22 Apr 2008 23:16:25 +0000 (17:16 -0600)
committer	Loren Merritt <pengvado@akuvian.org>
	Sun, 27 Apr 2008 08:40:52 +0000 (02:40 -0600)
common/cpu.c		patch \| blob \| history
common/dct.c		patch \| blob \| history
common/pixel.c		patch \| blob \| history
common/quant.c		patch \| blob \| history
common/x86/dct-a.asm		patch \| blob \| history
common/x86/mc-a.asm		patch \| blob \| history
common/x86/mc-a2.asm		patch \| blob \| history
common/x86/mc-c.c		patch \| blob \| history
common/x86/pixel-a.asm		patch \| blob \| history
common/x86/predict-c.c		patch \| blob \| history
common/x86/quant-a.asm		patch \| blob \| history
common/x86/sad-a.asm		patch \| blob \| history
configure		patch \| blob \| history