Even if not using ymm registers, AVX operations will cause SIGILLs on unsupported OSs.
On Windows, AVX is only available on Windows 7 SP1 or later.
#if HAVE_MMX
int x264_cpu_cpuid_test( void );
-uint32_t x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
+void x264_cpu_cpuid( uint32_t op, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx );
+void x264_cpu_xgetbv( uint32_t op, int *eax, int *edx );
uint32_t x264_cpu_detect( void )
{
cpu |= X264_CPU_SSE4;
if( ecx&0x00100000 )
cpu |= X264_CPU_SSE42;
- if( ecx&0x10000000 )
- cpu |= X264_CPU_AVX;
+ /* Check OXSAVE and AVX bits */
+ if( (ecx&0x18000000) == 0x18000000 )
+ {
+ /* Check for OS support */
+ x264_cpu_xgetbv( 0, &eax, &edx );
+ if( (eax&0x6) == 0x6 )
+ cpu |= X264_CPU_AVX;
+ }
if( cpu & X264_CPU_SSSE3 )
cpu |= X264_CPU_SSE2_IS_FAST;
SECTION .text
-%ifdef ARCH_X86_64
-
;-----------------------------------------------------------------------------
-; int cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
+; void cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
;-----------------------------------------------------------------------------
cglobal cpu_cpuid, 5,7
- push rbx
- mov r11, r1
- mov r10, r2
- movifnidn r9, r3
- movifnidn r8, r4
- mov eax, r0d
+ push rbx
+ push r4
+ push r3
+ push r2
+ push r1
+ mov eax, r0d
cpuid
- mov [r11], eax
- mov [r10], ebx
- mov [r9], ecx
- mov [r8], edx
- pop rbx
+ pop rsi
+ mov [rsi], eax
+ pop rsi
+ mov [rsi], ebx
+ pop rsi
+ mov [rsi], ecx
+ pop rsi
+ mov [rsi], edx
+ pop rbx
+ RET
+
+;-----------------------------------------------------------------------------
+; void cpu_xgetbv( int op, int *eax, int *edx )
+;-----------------------------------------------------------------------------
+cglobal cpu_xgetbv, 3,7
+ push r2
+ push r1
+ mov ecx, r0d
+ xgetbv
+ pop rsi
+ mov [rsi], eax
+ pop rsi
+ mov [rsi], edx
RET
-%else
+%ifndef ARCH_X86_64
;-----------------------------------------------------------------------------
; int cpu_cpuid_test( void )
popfd
ret
-;-----------------------------------------------------------------------------
-; int cpu_cpuid( int op, int *eax, int *ebx, int *ecx, int *edx )
-;-----------------------------------------------------------------------------
-cglobal cpu_cpuid, 0,6
- mov eax, r0m
- cpuid
- mov esi, r1m
- mov [esi], eax
- mov esi, r2m
- mov [esi], ebx
- mov esi, r3m
- mov [esi], ecx
- mov esi, r4m
- mov [esi], edx
- RET
-
;-----------------------------------------------------------------------------
; void stack_align( void (*func)(void*), void *arg );
;-----------------------------------------------------------------------------
#define X264_CPU_FAST_NEON_MRC 0x080000 /* Transfer from NEON to ARM register is fast (Cortex-A9) */
#define X264_CPU_SLOW_CTZ 0x100000 /* BSR/BSF x86 instructions are really slow on some CPUs */
#define X264_CPU_SLOW_ATOM 0x200000 /* The Atom just sucks */
-#define X264_CPU_AVX 0x400000 /* AVX support -- we don't currently use YMM registers, just
- * the 3-operand capability, so we don't require OS support
- * for AVX. */
+#define X264_CPU_AVX 0x400000 /* AVX support: requires OS support even if YMM registers
+ * aren't used. */
/* Analyse flags
*/