From: Gabriel Bouvigne Date: Fri, 27 Jun 2008 03:09:55 +0000 (-0600) Subject: some asm functions require aligned stack. disable these when compiling with msvc... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ed9a9f313240c887a7a3b330ceabe25fccbf47db;p=libx264 some asm functions require aligned stack. disable these when compiling with msvc/icc. --- diff --git a/common/cpu.c b/common/cpu.c index ed72c649..f2b5f037 100644 --- a/common/cpu.c +++ b/common/cpu.c @@ -51,6 +51,7 @@ const x264_cpu_name_t x264_cpu_names[] = { {"SSE4", X264_CPU_MMX|X264_CPU_MMXEXT|X264_CPU_SSE|X264_CPU_SSE2|X264_CPU_SSE3|X264_CPU_SSSE3|X264_CPU_SSE4}, {"Cache32", X264_CPU_CACHELINE_32}, {"Cache64", X264_CPU_CACHELINE_64}, + {"Slow_mod4_stack", X264_CPU_STACK_MOD4}, {"", 0}, }; @@ -172,6 +173,10 @@ uint32_t x264_cpu_detect( void ) fprintf( stderr, "x264 [warning]: unable to determine cacheline size\n" ); } +#ifdef BROKEN_STACK_ALIGNMENT + cpu |= X264_CPU_STACK_MOD4; +#endif + return cpu; } diff --git a/common/cpu.h b/common/cpu.h index 1871e3a2..30e966ab 100644 --- a/common/cpu.h +++ b/common/cpu.h @@ -43,7 +43,7 @@ void x264_stack_align( void (*func)(x264_t*), x264_t *arg ); #endif typedef struct { - const char name[12]; + const char name[16]; int flags; } x264_cpu_name_t; extern const x264_cpu_name_t x264_cpu_names[]; diff --git a/common/frame.c b/common/frame.c index 0c7a56c5..bdef1616 100644 --- a/common/frame.c +++ b/common/frame.c @@ -795,7 +795,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf ) pf->deblock_v_luma_intra = x264_deblock_v_luma_intra_mmxext; pf->deblock_h_luma_intra = x264_deblock_h_luma_intra_mmxext; #endif - if( cpu&X264_CPU_SSE2 ) + if( (cpu&X264_CPU_SSE2) && !(cpu&X264_CPU_STACK_MOD4) ) { pf->deblock_v_luma = x264_deblock_v_luma_sse2; pf->deblock_h_luma = x264_deblock_h_luma_sse2; diff --git a/common/osdep.h b/common/osdep.h index 939fe780..36685bec 100644 --- a/common/osdep.h +++ b/common/osdep.h @@ -133,4 +133,10 @@ /* FIXME: long isn't always the native register size (e.g. win64). */ #define WORD_SIZE sizeof(long) +#if !defined(_WIN64) && !defined(__LP64__) +#if defined(_MSC_VER) || defined(__INTEL_COMPILER) +#define BROKEN_STACK_ALIGNMENT /* define it if stack is not mod16 */ +#endif +#endif + #endif /* X264_OSDEP_H */ diff --git a/common/x86/mc-c.c b/common/x86/mc-c.c index 658cccfb..75ce420d 100644 --- a/common/x86/mc-c.c +++ b/common/x86/mc-c.c @@ -299,11 +299,14 @@ void x264_mc_init_mmx( int cpu, x264_mc_functions_t *pf ) pf->copy[PIXEL_16x16] = x264_mc_copy_w16_aligned_sse2; pf->avg[PIXEL_16x16] = x264_pixel_avg_16x16_sse2; pf->avg[PIXEL_16x8] = x264_pixel_avg_16x8_sse2; - pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_sse2; - pf->avg_weight[PIXEL_16x8] = x264_pixel_avg_weight_16x8_sse2; - pf->avg_weight[PIXEL_8x16] = x264_pixel_avg_weight_8x16_sse2; - pf->avg_weight[PIXEL_8x8] = x264_pixel_avg_weight_8x8_sse2; - pf->avg_weight[PIXEL_8x4] = x264_pixel_avg_weight_8x4_sse2; + if( !(cpu&X264_CPU_STACK_MOD4) ) + { + pf->avg_weight[PIXEL_16x16] = x264_pixel_avg_weight_16x16_sse2; + pf->avg_weight[PIXEL_16x8] = x264_pixel_avg_weight_16x8_sse2; + pf->avg_weight[PIXEL_8x16] = x264_pixel_avg_weight_8x16_sse2; + pf->avg_weight[PIXEL_8x8] = x264_pixel_avg_weight_8x8_sse2; + pf->avg_weight[PIXEL_8x4] = x264_pixel_avg_weight_8x4_sse2; + } pf->hpel_filter = x264_hpel_filter_sse2; pf->mc_chroma = x264_mc_chroma_sse2; diff --git a/x264.h b/x264.h index 0e257a19..838d5726 100644 --- a/x264.h +++ b/x264.h @@ -59,6 +59,7 @@ typedef struct x264_t x264_t; #define X264_CPU_SSSE3 0x000400 #define X264_CPU_PHADD_IS_FAST 0x000800 /* pre-Penryn Core2 have a uselessly slow PHADD instruction */ #define X264_CPU_SSE4 0x001000 /* SSE4.1 */ +#define X264_CPU_STACK_MOD4 0x002000 /* if stack is only mod4 and not mod16 */ /* Analyse flags */