From: Anton Mitrofanov Date: Tue, 29 Nov 2011 21:45:13 +0000 (-0800) Subject: checkasm: shut up gcc warnings, fix some naming of functions in results X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5ebbcd8748ae8d8b184db5a8f9b46a9ad865f0ae;p=libx264 checkasm: shut up gcc warnings, fix some naming of functions in results --- diff --git a/tools/checkasm.c b/tools/checkasm.c index c83b5991..1f491cfa 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -55,7 +55,7 @@ int quiet = 0; #define BENCH_RUNS 100 // tradeoff between accuracy and speed #define BENCH_ALIGNS 16 // number of stack+heap data alignments (another accuracy vs speed tradeoff) #define MAX_FUNCS 1000 // just has to be big enough to hold all the existing functions -#define MAX_CPUS 10 // number of different combinations of cpu flags +#define MAX_CPUS 30 // number of different combinations of cpu flags typedef struct { @@ -168,11 +168,10 @@ static void print_bench(void) b->cpu&X264_CPU_XOP ? "xop" : b->cpu&X264_CPU_AVX ? "avx" : b->cpu&X264_CPU_SSE4 ? "sse4" : - b->cpu&X264_CPU_SHUFFLE_IS_FAST ? "fastshuffle" : b->cpu&X264_CPU_SSSE3 ? "ssse3" : b->cpu&X264_CPU_SSE3 ? "sse3" : /* print sse2slow only if there's also a sse2fast version of the same func */ - b->cpu&X264_CPU_SSE2_IS_SLOW && jcpu&X264_CPU_SSE2_IS_SLOW && jcpu&X264_CPU_SSE2 ? "sse2" : b->cpu&X264_CPU_MMX ? "mmx" : b->cpu&X264_CPU_ALTIVEC ? "altivec" : @@ -180,6 +179,7 @@ static void print_bench(void) b->cpu&X264_CPU_ARMV6 ? "armv6" : "c", b->cpu&X264_CPU_CACHELINE_32 ? "_c32" : b->cpu&X264_CPU_CACHELINE_64 ? "_c64" : + b->cpu&X264_CPU_SHUFFLE_IS_FAST && !(b->cpu&X264_CPU_SSE4) ? "_fastshuffle" : b->cpu&X264_CPU_SSE_MISALIGN ? "_misalign" : b->cpu&X264_CPU_LZCNT ? "_lzcnt" : b->cpu&X264_CPU_FAST_NEON_MRC ? "_fast_mrc" : @@ -651,7 +651,8 @@ static int check_pixel( int cpu_ref, int cpu_new ) { ALIGNED_16( uint16_t sums[72] ); ALIGNED_16( int dc[4] ); - int16_t mvs_a[32], mvs_c[32]; + ALIGNED_16( int16_t mvs_a[32] ); + ALIGNED_16( int16_t mvs_c[32] ); int mvn_a, mvn_c; int thresh = rand() & 0x3fff; set_func_name( "esa_ads" ); @@ -2295,6 +2296,9 @@ static int add_flags( int *cpu_ref, int *cpu_new, int flags, const char *name ) { *cpu_ref = *cpu_new; *cpu_new |= flags; +#if BROKEN_STACK_ALIGNMENT + *cpu_new |= X264_CPU_STACK_MOD4; +#endif if( *cpu_new & X264_CPU_SSE2_IS_FAST ) *cpu_new &= ~X264_CPU_SSE2_IS_SLOW; if( !quiet ) @@ -2329,6 +2333,7 @@ static int check_all_flags( void ) ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE | X264_CPU_SSE2 | X264_CPU_SSE2_IS_SLOW, "SSE2Slow" ); ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE2_IS_FAST, "SSE2Fast" ); ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_64, "SSE2Fast Cache64" ); + cpu1 &= ~X264_CPU_CACHELINE_64; ret |= add_flags( &cpu0, &cpu1, X264_CPU_SHUFFLE_IS_FAST, "SSE2 FastShuffle" ); cpu1 &= ~X264_CPU_SHUFFLE_IS_FAST; ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSE2 SlowCTZ" ); @@ -2338,23 +2343,24 @@ static int check_all_flags( void ) } if( x264_cpu_detect() & X264_CPU_SSE_MISALIGN ) { - cpu1 &= ~X264_CPU_CACHELINE_64; ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE_MISALIGN, "SSE_Misalign" ); cpu1 &= ~X264_CPU_SSE_MISALIGN; } if( x264_cpu_detect() & X264_CPU_LZCNT ) { - cpu1 &= ~X264_CPU_CACHELINE_64; ret |= add_flags( &cpu0, &cpu1, X264_CPU_LZCNT, "SSE_LZCNT" ); cpu1 &= ~X264_CPU_LZCNT; } if( x264_cpu_detect() & X264_CPU_SSE3 ) + { ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE3 | X264_CPU_CACHELINE_64, "SSE3" ); + cpu1 &= ~X264_CPU_CACHELINE_64; + } if( x264_cpu_detect() & X264_CPU_SSSE3 ) { - cpu1 &= ~X264_CPU_CACHELINE_64; ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSSE3, "SSSE3" ); ret |= add_flags( &cpu0, &cpu1, X264_CPU_CACHELINE_64, "SSSE3 Cache64" ); + cpu1 &= ~X264_CPU_CACHELINE_64; ret |= add_flags( &cpu0, &cpu1, X264_CPU_SHUFFLE_IS_FAST, "SSSE3 FastShuffle" ); cpu1 &= ~X264_CPU_SHUFFLE_IS_FAST; ret |= add_flags( &cpu0, &cpu1, X264_CPU_SLOW_CTZ, "SSSE3 SlowCTZ" ); @@ -2363,10 +2369,7 @@ static int check_all_flags( void ) cpu1 &= ~X264_CPU_SLOW_ATOM; } if( x264_cpu_detect() & X264_CPU_SSE4 ) - { - cpu1 &= ~X264_CPU_CACHELINE_64; - ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE4, "SSE4" ); - } + ret |= add_flags( &cpu0, &cpu1, X264_CPU_SSE4 | X264_CPU_SHUFFLE_IS_FAST, "SSE4" ); if( x264_cpu_detect() & X264_CPU_AVX ) ret |= add_flags( &cpu0, &cpu1, X264_CPU_AVX, "AVX" ); if( x264_cpu_detect() & X264_CPU_XOP )