From 40bb56814e56ed342040bdbf30258aab39ee9e89 Mon Sep 17 00:00:00 2001 From: Anton Mitrofanov Date: Mon, 15 Dec 2014 18:49:23 +0300 Subject: [PATCH] x86: Update intel compiler cpu dispatcher override for new versions of ICC/ICL --- common/cpu.h | 1 - common/osdep.c | 45 ----------------------------------- common/x86/cpu-a.asm | 50 --------------------------------------- configure | 8 ++++++- encoder/encoder.c | 7 ++++++ extras/intel_dispatcher.h | 46 +++++++++++++++++++++++++++++++++++ 6 files changed, 60 insertions(+), 97 deletions(-) create mode 100644 extras/intel_dispatcher.h diff --git a/common/cpu.h b/common/cpu.h index 6d08027a..07e5c6cb 100644 --- a/common/cpu.h +++ b/common/cpu.h @@ -45,7 +45,6 @@ void x264_cpu_sfence( void ); #define x264_emms() #endif #define x264_sfence x264_cpu_sfence -void x264_safe_intel_cpu_indicator_init( void ); /* kludge: * gcc can't give variables any greater alignment than the stack frame has. diff --git a/common/osdep.c b/common/osdep.c index e97aaeda..91f3fdd7 100644 --- a/common/osdep.c +++ b/common/osdep.c @@ -94,51 +94,6 @@ int x264_threading_init( void ) } #endif -#if HAVE_MMX -#ifdef __INTEL_COMPILER -/* Agner's patch to Intel's CPU dispatcher from pages 131-132 of - * http://agner.org/optimize/optimizing_cpp.pdf (2011-01-30) - * adapted to x264's cpu schema. */ - -// Global variable indicating cpu -int __intel_cpu_indicator = 0; -// CPU dispatcher function -void x264_intel_cpu_indicator_init( void ) -{ - unsigned int cpu = x264_cpu_detect(); - if( cpu&X264_CPU_AVX ) - __intel_cpu_indicator = 0x20000; - else if( cpu&X264_CPU_SSE42 ) - __intel_cpu_indicator = 0x8000; - else if( cpu&X264_CPU_SSE4 ) - __intel_cpu_indicator = 0x2000; - else if( cpu&X264_CPU_SSSE3 ) - __intel_cpu_indicator = 0x1000; - else if( cpu&X264_CPU_SSE3 ) - __intel_cpu_indicator = 0x800; - else if( cpu&X264_CPU_SSE2 && !(cpu&X264_CPU_SSE2_IS_SLOW) ) - __intel_cpu_indicator = 0x200; - else if( cpu&X264_CPU_SSE ) - __intel_cpu_indicator = 0x80; - else if( cpu&X264_CPU_MMX2 ) - __intel_cpu_indicator = 8; - else - __intel_cpu_indicator = 1; -} - -/* __intel_cpu_indicator_init appears to have a non-standard calling convention that - * assumes certain registers aren't preserved, so we'll route it through a function - * that backs up all the registers. */ -void __intel_cpu_indicator_init( void ) -{ - x264_safe_intel_cpu_indicator_init(); -} -#else -void x264_intel_cpu_indicator_init( void ) -{} -#endif -#endif - #ifdef _WIN32 /* Functions for dealing with Unicode on Windows. */ FILE *x264_fopen( const char *filename, const char *mode ) diff --git a/common/x86/cpu-a.asm b/common/x86/cpu-a.asm index bcf6c434..4dfd7754 100644 --- a/common/x86/cpu-a.asm +++ b/common/x86/cpu-a.asm @@ -145,53 +145,3 @@ cglobal cpu_emms cglobal cpu_sfence sfence ret - -cextern intel_cpu_indicator_init - -;----------------------------------------------------------------------------- -; void safe_intel_cpu_indicator_init( void ); -;----------------------------------------------------------------------------- -cglobal safe_intel_cpu_indicator_init - push r0 - push r1 - push r2 - push r3 - push r4 - push r5 - push r6 -%if ARCH_X86_64 - push r7 - push r8 - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 -%endif - push rbp - mov rbp, rsp -%if WIN64 - sub rsp, 32 ; shadow space -%endif - and rsp, ~31 - call intel_cpu_indicator_init - leave -%if ARCH_X86_64 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop r8 - pop r7 -%endif - pop r6 - pop r5 - pop r4 - pop r3 - pop r2 - pop r1 - pop r0 - ret diff --git a/configure b/configure index 02fe4be3..e2977bdf 100755 --- a/configure +++ b/configure @@ -311,7 +311,7 @@ NL=" # list of all preprocessor HAVE values we can define CONFIG_HAVE="MALLOC_H ALTIVEC ALTIVEC_H MMX ARMV6 ARMV6T2 NEON BEOSTHREAD POSIXTHREAD WIN32THREAD THREAD LOG2F SWSCALE \ - LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC" + LAVF FFMS GPAC AVS GPL VECTOREXT INTERLACED CPU_COUNT OPENCL THP LSMASH X86_INLINE_ASM AS_FUNC INTEL_DISPATCHER" # parse options @@ -1093,6 +1093,12 @@ if cc_check '' -Wmaybe-uninitialized ; then CFLAGS="-Wno-maybe-uninitialized $CFLAGS" fi +if [ $compiler = ICC -o $compiler = ICL ] ; then + if cc_check 'extras/intel_dispatcher.h' '' 'x264_intel_dispatcher_override();' ; then + define HAVE_INTEL_DISPATCHER + fi +fi + if [ "$bit_depth" -gt "8" ]; then define HIGH_BIT_DEPTH ASFLAGS="$ASFLAGS -DHIGH_BIT_DEPTH=1" diff --git a/encoder/encoder.c b/encoder/encoder.c index c98a900e..54d2e5a8 100644 --- a/encoder/encoder.c +++ b/encoder/encoder.c @@ -32,6 +32,9 @@ #include "ratecontrol.h" #include "macroblock.h" #include "me.h" +#if HAVE_INTEL_DISPATCHER +#include "extras/intel_dispatcher.h" +#endif //#define DEBUG_MB_TYPE @@ -1390,6 +1393,10 @@ x264_t *x264_encoder_open( x264_param_t *param ) if( param->param_free ) param->param_free( param ); +#if HAVE_INTEL_DISPATCHER + x264_intel_dispatcher_override(); +#endif + if( x264_threading_init() ) { x264_log( h, X264_LOG_ERROR, "unable to initialize threading\n" ); diff --git a/extras/intel_dispatcher.h b/extras/intel_dispatcher.h new file mode 100644 index 00000000..8837c626 --- /dev/null +++ b/extras/intel_dispatcher.h @@ -0,0 +1,46 @@ +/***************************************************************************** + * intel_dispatcher.h: intel compiler cpu dispatcher override + ***************************************************************************** + * Copyright (C) 2014 x264 project + * + * Authors: Anton Mitrofanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA. + * + * This program is also available under a commercial proprietary license. + * For more information, contact us at licensing@x264.com. + *****************************************************************************/ + +#ifndef X264_INTEL_DISPATCHER_H +#define X264_INTEL_DISPATCHER_H + +/* Feature flags using _FEATURE_* defines from immintrin.h */ +extern unsigned long long __intel_cpu_feature_indicator; +extern unsigned long long __intel_cpu_feature_indicator_x; + +/* CPU vendor independent version of dispatcher */ +void __intel_cpu_features_init_x( void ); + +static void x264_intel_dispatcher_override( void ) +{ + if( __intel_cpu_feature_indicator & ~1ULL ) + return; + __intel_cpu_feature_indicator = 0; + __intel_cpu_feature_indicator_x = 0; + __intel_cpu_features_init_x(); + __intel_cpu_feature_indicator = __intel_cpu_feature_indicator_x; +} + +#endif -- 2.40.0