]> granicus.if.org Git - libx264/commitdiff
some asm functions require aligned stack. disable these when compiling with msvc...
authorGabriel Bouvigne <gabriel.bouvigne@joost.com>
Fri, 27 Jun 2008 03:09:55 +0000 (21:09 -0600)
committerLoren Merritt <pengvado@akuvian.org>
Sun, 29 Jun 2008 05:40:16 +0000 (23:40 -0600)
common/cpu.c
common/cpu.h
common/frame.c
common/osdep.h
common/x86/mc-c.c
x264.h

index ed72c6496c24a5248525d2a09cb05c40bc5035d6..f2b5f037d451eb7a3712a89c749f87a8dabed817 100644 (file)
@@ -51,6 +51,7 @@ const x264_cpu_name_t x264_cpu_names[] = {
     {"SSE4",    X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4},
     {"Cache32", X264_CPU_CACHELINE_32},
     {"Cache64", X264_CPU_CACHELINE_64},
+    {"Slow_mod4_stack", X264_CPU_STACK_MOD4},
     {"", 0},
 };
 
@@ -172,6 +173,10 @@ uint32_t x264_cpu_detect( void )
             fprintf( stderr, "x264 [warning]: unable to determine cacheline size\n" );
     }
 
+#ifdef BROKEN_STACK_ALIGNMENT
+    cpu |= X264_CPU_STACK_MOD4;
+#endif
+
     return cpu;
 }
 
index 1871e3a2af2dcce257972725b67846c388662582..30e966ab29c1b1a19fdb14d292438100600ceffb 100644 (file)
@@ -43,7 +43,7 @@ void x264_stack_align( void (*func)(x264_t*), x264_t *arg );
 #endif
 
 typedef struct {
-    const char name[12];
+    const char name[16];
     int flags;
 } x264_cpu_name_t;
 extern const x264_cpu_name_t x264_cpu_names[];
index 0c7a56c5b17d581e4160963a1e60a8ccc269a5a7..bdef1616fbdce6c4fd7e793a6d5f94fca59f39fa 100644 (file)
@@ -795,7 +795,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf )
         pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext;
         pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext;
 #endif
-        if( cpu&X264_CPU_SSE2 )
+        if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) )
         {
             pf->deblock_v_luma = x264_deblock_v_luma_sse2;
             pf->deblock_h_luma = x264_deblock_h_luma_sse2;
index 939fe7809936e5986173032d788689e51d057951..36685bec5e2f28ee106cb0fdc042562e10be5568 100644 (file)
 /* FIXME: long isn't always the native register size (e.g. win64). */
 #define WORD_SIZE sizeof(long)
 
+#if !defined(_WIN64) && !defined(__LP64__)
+#if defined(_MSC_VER) || defined(__INTEL_COMPILER)
+#define BROKEN_STACK_ALIGNMENT /* define it if stack is not mod16 */
+#endif
+#endif
+
 #endif /* X264_OSDEP_H */
index 658cccfbad05cfc1f42e474d81dc500c3d0a5474..75ce420d4ad373551962230c8e4ea1d4225e8f7e 100644 (file)
@@ -299,11 +299,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf )
     pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2;
     pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2;
     pf->avg[PIXEL_16x8]  = x264_pixel_avg_16x8_sse2;
-    pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_sse2;
-    pf->avg_weight[PIXEL_16x8]  = x264_pixel_avg_weight_16x8_sse2;
-    pf->avg_weight[PIXEL_8x16]  = x264_pixel_avg_weight_8x16_sse2;
-    pf->avg_weight[PIXEL_8x8]   = x264_pixel_avg_weight_8x8_sse2;
-    pf->avg_weight[PIXEL_8x4]   = x264_pixel_avg_weight_8x4_sse2;
+    if( !(cpu&X264_CPU_STACK_MOD4) )
+    {
+        pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_sse2;
+        pf->avg_weight[PIXEL_16x8]  = x264_pixel_avg_weight_16x8_sse2;
+        pf->avg_weight[PIXEL_8x16]  = x264_pixel_avg_weight_8x16_sse2;
+        pf->avg_weight[PIXEL_8x8]   = x264_pixel_avg_weight_8x8_sse2;
+        pf->avg_weight[PIXEL_8x4]   = x264_pixel_avg_weight_8x4_sse2;
+    }
     pf->hpel_filter = x264_hpel_filter_sse2;
     pf->mc_chroma = x264_mc_chroma_sse2;
 
diff --git a/x264.h b/x264.h
index 0e257a1939df573e6a45d9287e18d36e46a4b86d..838d57267bf86d68a626485b6c740586b926fc3f 100644 (file)
--- a/x264.h
+++ b/x264.h
@@ -59,6 +59,7 @@ typedef struct x264_t x264_t;
 #define X264_CPU_SSSE3          0x000400
 #define X264_CPU_PHADD_IS_FAST  0x000800  /* pre-Penryn Core2 have a uselessly slow PHADD instruction */
 #define X264_CPU_SSE4           0x001000  /* SSE4.1 */
+#define X264_CPU_STACK_MOD4     0x002000  /* if stack is only mod4 and not mod16 */
 
 /* Analyse flags
  */