From: Anton Mitrofanov Date: Thu, 31 Oct 2019 23:45:39 +0000 (+0300) Subject: aarch64: Fix compilation with disabled asm X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7114174b23b1764b8f4b58ae9d0f8a422748df0f;p=libx264 aarch64: Fix compilation with disabled asm --- diff --git a/common/bitstream.c b/common/bitstream.c index bd7e7e75..177a2c29 100644 --- a/common/bitstream.c +++ b/common/bitstream.c @@ -45,7 +45,7 @@ static uint8_t *nal_escape_c( uint8_t *dst, uint8_t *src, uint8_t *end ) #if HAVE_ARMV6 #include "arm/bitstream.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 #include "aarch64/bitstream.h" #endif @@ -159,7 +159,7 @@ void x264_bitstream_init( int cpu, x264_bitstream_function_t *pf ) if( cpu&X264_CPU_NEON ) pf->nal_escape = x264_nal_escape_neon; #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) pf->nal_escape = x264_nal_escape_neon; #endif diff --git a/common/cabac.h b/common/cabac.h index 8b4d9301..bb822104 100644 --- a/common/cabac.h +++ b/common/cabac.h @@ -80,7 +80,7 @@ void x264_cabac_encode_flush( x264_t *h, x264_cabac_t *cb ); #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm -#elif defined(ARCH_AARCH64) +#elif HAVE_AARCH64 #define x264_cabac_encode_decision x264_cabac_encode_decision_asm #define x264_cabac_encode_bypass x264_cabac_encode_bypass_asm #define x264_cabac_encode_terminal x264_cabac_encode_terminal_asm diff --git a/common/cpu.c b/common/cpu.c index 063d148e..40c4628f 100644 --- a/common/cpu.c +++ b/common/cpu.c @@ -45,7 +45,7 @@ const x264_cpu_name_t x264_cpu_names[] = { -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 // {"MMX", X264_CPU_MMX}, // we don't support asm on mmx1 cpus anymore #define MMX2 X264_CPU_MMX|X264_CPU_MMX2 {"MMX2", MMX2}, @@ -97,7 +97,7 @@ const x264_cpu_name_t x264_cpu_names[] = {"", 0}, }; -#if (ARCH_PPC && SYS_LINUX) || (ARCH_ARM && !HAVE_NEON) +#if (HAVE_ALTIVEC && SYS_LINUX) || (HAVE_ARMV6 && !HAVE_NEON) #include #include static sigjmp_buf jmpbuf; @@ -298,7 +298,7 @@ uint32_t x264_cpu_detect( void ) return cpu; } -#elif ARCH_PPC && HAVE_ALTIVEC +#elif HAVE_ALTIVEC #if SYS_MACOSX || SYS_OPENBSD || SYS_FREEBSD #include @@ -355,7 +355,7 @@ uint32_t x264_cpu_detect( void ) } #endif -#elif ARCH_ARM +#elif HAVE_ARMV6 void x264_cpu_neon_test( void ); int x264_cpu_fast_neon_mrc_test( void ); @@ -363,7 +363,6 @@ int x264_cpu_fast_neon_mrc_test( void ); uint32_t x264_cpu_detect( void ) { int flags = 0; -#if HAVE_ARMV6 flags |= X264_CPU_ARMV6; // don't do this hack if compiled with -mfpu=neon @@ -396,26 +395,21 @@ uint32_t x264_cpu_detect( void ) flags |= x264_cpu_fast_neon_mrc_test() ? X264_CPU_FAST_NEON_MRC : 0; #endif // TODO: write dual issue test? currently it's A8 (dual issue) vs. A9 (fast mrc) -#endif return flags; } -#elif ARCH_AARCH64 +#elif HAVE_AARCH64 uint32_t x264_cpu_detect( void ) { return X264_CPU_ARMV8 | X264_CPU_NEON; } -#elif ARCH_MIPS +#elif HAVE_MSA uint32_t x264_cpu_detect( void ) { - uint32_t flags = 0; -#if HAVE_MSA - flags |= X264_CPU_MSA; -#endif - return flags; + return X264_CPU_MSA; } #else diff --git a/common/dct.c b/common/dct.c index 5d1ea75f..83b4e720 100644 --- a/common/dct.c +++ b/common/dct.c @@ -29,16 +29,16 @@ #if HAVE_MMX # include "x86/dct.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/dct.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/dct.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/dct.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/dct.h" #endif @@ -682,7 +682,7 @@ void x264_dct_init( int cpu, x264_dct_function_t *dctf ) } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { dctf->sub4x4_dct = x264_sub4x4_dct_neon; @@ -996,11 +996,11 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig pf_progressive->scan_8x8 = x264_zigzag_scan_8x8_frame_altivec; } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf_progressive->scan_4x4 = x264_zigzag_scan_4x4_frame_neon; -#if ARCH_AARCH64 +#if HAVE_AARCH64 pf_interlaced->scan_4x4 = x264_zigzag_scan_4x4_field_neon; pf_interlaced->scan_8x8 = x264_zigzag_scan_8x8_field_neon; pf_interlaced->sub_4x4 = x264_zigzag_sub_4x4_field_neon; @@ -1010,9 +1010,9 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig pf_progressive->sub_4x4 = x264_zigzag_sub_4x4_frame_neon; pf_progressive->sub_4x4ac = x264_zigzag_sub_4x4ac_frame_neon; pf_progressive->sub_8x8 = x264_zigzag_sub_8x8_frame_neon; -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 } -#endif // HAVE_ARMV6 || ARCH_AARCH64 +#endif // HAVE_ARMV6 || HAVE_AARCH64 #endif // HIGH_BIT_DEPTH pf_interlaced->interleave_8x8_cavlc = @@ -1065,13 +1065,13 @@ void x264_zigzag_init( int cpu, x264_zigzag_function_t *pf_progressive, x264_zig #endif // HIGH_BIT_DEPTH #endif #if !HIGH_BIT_DEPTH -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf_interlaced->interleave_8x8_cavlc = pf_progressive->interleave_8x8_cavlc = x264_zigzag_interleave_8x8_cavlc_neon; } -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 #if HAVE_ALTIVEC if( cpu&X264_CPU_ALTIVEC ) diff --git a/common/deblock.c b/common/deblock.c index f887b51a..13816d33 100644 --- a/common/deblock.c +++ b/common/deblock.c @@ -667,13 +667,13 @@ void x264_macroblock_deblock( x264_t *h ) #if HAVE_MMX #include "x86/deblock.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC #include "ppc/deblock.h" #endif #if HAVE_ARMV6 #include "arm/deblock.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 #include "aarch64/deblock.h" #endif #if HAVE_MSA @@ -782,7 +782,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff ) } #endif // HAVE_ALTIVEC -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf->deblock_luma[1] = x264_deblock_v_luma_neon; diff --git a/common/mc.c b/common/mc.c index f2a49a9c..15e9cbb7 100644 --- a/common/mc.c +++ b/common/mc.c @@ -29,16 +29,16 @@ #if HAVE_MMX #include "x86/mc.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC #include "ppc/mc.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 #include "arm/mc.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 #include "aarch64/mc.h" #endif -#if ARCH_MIPS +#if HAVE_MSA #include "mips/mc.h" #endif @@ -680,7 +680,7 @@ void x264_mc_init( int cpu, x264_mc_functions_t *pf, int cpu_independent ) #if HAVE_ARMV6 x264_mc_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_mc_init_aarch64( cpu, pf ); #endif #if HAVE_MSA diff --git a/common/pixel.c b/common/pixel.c index d1f61551..83e94ab3 100644 --- a/common/pixel.c +++ b/common/pixel.c @@ -31,18 +31,18 @@ # include "x86/pixel.h" # include "x86/predict.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/pixel.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/pixel.h" # include "arm/predict.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/pixel.h" # include "aarch64/predict.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/pixel.h" #endif @@ -508,7 +508,7 @@ SATD_X_DECL7( _avx512 ) #endif #if !HIGH_BIT_DEPTH -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 SATD_X_DECL7( _neon ) #endif #endif // !HIGH_BIT_DEPTH @@ -532,7 +532,7 @@ INTRA_MBCMP_8x8(sa8d,, _c ) INTRA_MBCMP_8x8( sad, _mmx2, _c ) INTRA_MBCMP_8x8(sa8d, _sse2, _sse2 ) #endif -#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || ARCH_AARCH64) +#if !HIGH_BIT_DEPTH && (HAVE_ARMV6 || HAVE_AARCH64) INTRA_MBCMP_8x8( sad, _neon, _neon ) INTRA_MBCMP_8x8(sa8d, _neon, _neon ) #endif @@ -602,7 +602,7 @@ INTRA_MBCMP(satd, 8x16, dc, h, v, c, _neon, _c ) INTRA_MBCMP( sad, 16x16, v, h, dc, , _neon, _neon ) INTRA_MBCMP(satd, 16x16, v, h, dc, , _neon, _neon ) #endif -#if !HIGH_BIT_DEPTH && ARCH_AARCH64 +#if !HIGH_BIT_DEPTH && HAVE_AARCH64 INTRA_MBCMP( sad, 4x4, v, h, dc, , _neon, _neon ) INTRA_MBCMP(satd, 4x4, v, h, dc, , _neon, _neon ) INTRA_MBCMP( sad, 8x8, dc, h, v, c, _neon, _neon ) @@ -1434,7 +1434,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) } #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { INIT8( sad, _neon ); @@ -1475,7 +1475,7 @@ void x264_pixel_init( int cpu, x264_pixel_function_t *pixf ) pixf->ssim_4x4x2_core = x264_pixel_ssim_4x4x2_core_neon; pixf->ssim_end4 = x264_pixel_ssim_end4_neon; } -#endif // ARCH_AARCH64 +#endif // HAVE_AARCH64 #if HAVE_MSA if( cpu&X264_CPU_MSA ) diff --git a/common/predict.c b/common/predict.c index 3df3f038..7a8bbcda 100644 --- a/common/predict.c +++ b/common/predict.c @@ -34,16 +34,16 @@ #if HAVE_MMX # include "x86/predict.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/predict.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/predict.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/predict.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/predict.h" #endif @@ -906,7 +906,7 @@ void x264_predict_16x16_init( int cpu, x264_predict_t pf[7] ) x264_predict_16x16_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_16x16_init_aarch64( cpu, pf ); #endif @@ -949,7 +949,7 @@ void x264_predict_8x8c_init( int cpu, x264_predict_t pf[7] ) x264_predict_8x8c_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x8c_init_aarch64( cpu, pf ); #endif @@ -981,7 +981,7 @@ void x264_predict_8x16c_init( int cpu, x264_predict_t pf[7] ) x264_predict_8x16c_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x16c_init_aarch64( cpu, pf ); #endif } @@ -1010,7 +1010,7 @@ void x264_predict_8x8_init( int cpu, x264_predict8x8_t pf[12], x264_predict_8x8_ x264_predict_8x8_init_arm( cpu, pf, predict_filter ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_8x8_init_aarch64( cpu, pf, predict_filter ); #endif @@ -1047,7 +1047,7 @@ void x264_predict_4x4_init( int cpu, x264_predict_t pf[12] ) x264_predict_4x4_init_arm( cpu, pf ); #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 x264_predict_4x4_init_aarch64( cpu, pf ); #endif } diff --git a/common/quant.c b/common/quant.c index e8fc38d6..5d2104f3 100644 --- a/common/quant.c +++ b/common/quant.c @@ -31,16 +31,16 @@ #if HAVE_MMX #include "x86/quant.h" #endif -#if ARCH_PPC +#if HAVE_ALTIVEC # include "ppc/quant.h" #endif -#if ARCH_ARM +#if HAVE_ARMV6 # include "arm/quant.h" #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 # include "aarch64/quant.h" #endif -#if ARCH_MIPS +#if HAVE_MSA # include "mips/quant.h" #endif @@ -756,7 +756,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ) pf->coeff_last8 = x264_coeff_last8_arm; } #endif -#if HAVE_ARMV6 || ARCH_AARCH64 +#if HAVE_ARMV6 || HAVE_AARCH64 if( cpu&X264_CPU_NEON ) { pf->quant_2x2_dc = x264_quant_2x2_dc_neon; @@ -776,7 +776,7 @@ void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf ) pf->decimate_score64 = x264_decimate_score64_neon; } #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 if( cpu&X264_CPU_ARMV8 ) { pf->coeff_last4 = x264_coeff_last4_aarch64; diff --git a/configure b/configure index f9805dc6..e570dd2d 100755 --- a/configure +++ b/configure @@ -396,7 +396,7 @@ NL=" " # list of all preprocessor HAVE values we can define -CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ +CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON AARCH64 BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER \ MSA MMAP WINRT VSX ARM_INLINE_ASM STRTOK_R CLOCK_GETTIME BITDEPTH8 BITDEPTH10" @@ -953,8 +953,10 @@ fi if [ $asm = auto -a $ARCH = AARCH64 ] ; then if [ $compiler = CL ] && cpp_check '' '' 'defined(_M_ARM64)' ; then + define HAVE_AARCH64 define HAVE_NEON elif cc_check '' '' '__asm__("cmeq v0.8h, v0.8h, #0");' ; then + define HAVE_AARCH64 define HAVE_NEON ASFLAGS="$ASFLAGS -c" else diff --git a/tools/checkasm.c b/tools/checkasm.c index 36d73558..49ba11fc 100644 --- a/tools/checkasm.c +++ b/tools/checkasm.c @@ -175,7 +175,7 @@ static void print_bench(void) if( k < j ) continue; printf( "%s_%s%s: %"PRId64"\n", benchs[i].name, -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 b->cpu&X264_CPU_AVX512 ? "avx512" : b->cpu&X264_CPU_AVX2 ? "avx2" : b->cpu&X264_CPU_BMI2 ? "bmi2" : @@ -206,7 +206,7 @@ static void print_bench(void) b->cpu&X264_CPU_MSA ? "msa" : #endif "c", -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 b->cpu&X264_CPU_CACHELINE_32 ? "_c32" : b->cpu&X264_CPU_SLOW_ATOM && b->cpu&X264_CPU_CACHELINE_64 ? "_c64_atom" : b->cpu&X264_CPU_CACHELINE_64 ? "_c64" : @@ -229,7 +229,7 @@ static void print_bench(void) static void (*simd_warmup_func)( void ) = NULL; #define simd_warmup() do { if( simd_warmup_func ) simd_warmup_func(); } while( 0 ) -#if ARCH_X86 || ARCH_X86_64 +#if HAVE_MMX int x264_stack_pagealign( int (*func)(), int align ); void x264_checkasm_warmup_avx( void ); void x264_checkasm_warmup_avx512( void ); @@ -241,11 +241,11 @@ intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ); #define x264_stack_pagealign( func, align ) func() #endif -#if ARCH_AARCH64 +#if HAVE_AARCH64 intptr_t x264_checkasm_call( intptr_t (*func)(), int *ok, ... ); #endif -#if ARCH_ARM +#if HAVE_ARMV6 intptr_t x264_checkasm_call_neon( intptr_t (*func)(), int *ok, ... ); intptr_t x264_checkasm_call_noneon( intptr_t (*func)(), int *ok, ... ); intptr_t (*x264_checkasm_call)( intptr_t (*func)(), int *ok, ... ) = x264_checkasm_call_noneon; @@ -253,7 +253,7 @@ intptr_t (*x264_checkasm_call)( intptr_t (*func)(), int *ok, ... ) = x264_checka #define call_c1(func,...) func(__VA_ARGS__) -#if ARCH_X86_64 +#if HAVE_MMX && ARCH_X86_64 /* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended to 64-bit. * This is done by clobbering the stack with junk around the stack pointer and calling the * assembly function through x264_checkasm_call with added dummy arguments which forces all @@ -269,19 +269,19 @@ void x264_checkasm_stack_clobber( uint64_t clobber, ... ); x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+6 */ \ simd_warmup(); \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_AARCH64 && !defined(__APPLE__) +#elif HAVE_AARCH64 && !defined(__APPLE__) void x264_checkasm_stack_clobber( uint64_t clobber, ... ); #define call_a1(func,...) ({ \ uint64_t r = (rand() & 0xffff) * 0x0001000100010001ULL; \ x264_checkasm_stack_clobber( r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r,r ); /* max_args+8 */ \ x264_checkasm_call(( intptr_t(*)())func, &ok, 0, 0, 0, 0, 0, 0, __VA_ARGS__ ); }) -#elif ARCH_X86 || ARCH_ARM +#elif HAVE_MMX || HAVE_ARMV6 #define call_a1(func,...) x264_checkasm_call( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1 call_c1 #endif -#if ARCH_ARM +#if HAVE_ARMV6 #define call_a1_64(func,...) ((uint64_t (*)(intptr_t(*)(), int*, ...))x264_checkasm_call)( (intptr_t(*)())func, &ok, __VA_ARGS__ ) #else #define call_a1_64 call_a1 @@ -2601,7 +2601,7 @@ static void run_cabac_terminal_##cpu( x264_t *h, uint8_t *dst )\ DECL_CABAC(c) #if HAVE_MMX DECL_CABAC(asm) -#elif defined(ARCH_AARCH64) +#elif HAVE_AARCH64 DECL_CABAC(asm) #else #define run_cabac_decision_asm run_cabac_decision_c @@ -2807,7 +2807,7 @@ static int check_all_flags( void ) int ret = 0; int cpu0 = 0, cpu1 = 0; uint32_t cpu_detect = x264_cpu_detect(); -#if ARCH_X86 || ARCH_X86_64 +#if HAVE_MMX if( cpu_detect & X264_CPU_AVX512 ) simd_warmup_func = x264_checkasm_warmup_avx512; else if( cpu_detect & X264_CPU_AVX ) @@ -2815,7 +2815,7 @@ static int check_all_flags( void ) #endif simd_warmup(); -#if HAVE_MMX +#if ARCH_X86 || ARCH_X86_64 if( cpu_detect & X264_CPU_MMX2 ) { ret |= add_flags( &cpu0, &cpu1, X264_CPU_MMX | X264_CPU_MMX2, "MMX" );