platforms when passing invalid arguments to certain methods in the TurboJPEG
Java API.
+2. Fixed a regression in the SIMD feature detection code, introduced by
+the AVX2 SIMD extensions (2.0 beta1[1]), that was known to cause an illegal
+instruction exception, in rare cases, on CPUs that lack support for CPUID leaf
+07H (or on which the maximum CPUID leaf has been limited by way of a BIOS
+setting.)
+
2.0.2
=====
xor eax, edx
jz near .return ; CPUID is not supported
- ; Check for MMX instruction support
+ ; Check whether CPUID leaf 07H is supported
+ ; (leaf 07H is used to check for AVX2 instruction support)
xor eax, eax
cpuid
test eax, eax
jz near .return
-
- xor eax, eax
- inc eax
- cpuid
- mov eax, edx ; eax = Standard feature flags
-
- test eax, 1<<23 ; bit23:MMX
- jz short .no_mmx
- or edi, byte JSIMD_MMX
-.no_mmx:
- test eax, 1<<25 ; bit25:SSE
- jz short .no_sse
- or edi, byte JSIMD_SSE
-.no_sse:
- test eax, 1<<26 ; bit26:SSE2
- jz short .no_sse2
- or edi, byte JSIMD_SSE2
-.no_sse2:
+ cmp eax, 7
+ jl short .no_avx2 ; Maximum leaf < 07H
; Check for AVX2 instruction support
mov eax, 7
or edi, JSIMD_AVX2
.no_avx2:
+ ; Check CPUID leaf 01H for MMX, SSE, and SSE2 support
+ xor eax, eax
+ inc eax
+ cpuid
+ mov eax, edx ; eax = Standard feature flags
+
+ ; Check for MMX instruction support
+ test eax, 1<<23 ; bit23:MMX
+ jz short .no_mmx
+ or edi, byte JSIMD_MMX
+.no_mmx:
+ test eax, 1<<25 ; bit25:SSE
+ jz short .no_sse
+ or edi, byte JSIMD_SSE
+.no_sse:
+ test eax, 1<<26 ; bit26:SSE2
+ jz short .no_sse2
+ or edi, byte JSIMD_SSE2
+.no_sse2:
+
; Check for 3DNow! instruction support
mov eax, 0x80000000
cpuid
xor rdi, rdi ; simd support flag
+ ; Assume that all x86-64 processors support SSE & SSE2 instructions
+ or rdi, JSIMD_SSE2
+ or rdi, JSIMD_SSE
+
+ ; Check whether CPUID leaf 07H is supported
+ ; (leaf 07H is used to check for AVX2 instruction support)
+ mov rax, 0
+ cpuid
+ cmp rax, 7
+ jl short .return ; Maximum leaf < 07H
+
; Check for AVX2 instruction support
mov rax, 7
xor rcx, rcx
cpuid
mov rax, rbx ; rax = Extended feature flags
- or rdi, JSIMD_SSE2
- or rdi, JSIMD_SSE
test rax, 1<<5 ; bit5:AVX2
jz short .return