From: Peter Johnson Date: Wed, 12 Sep 2007 07:10:26 +0000 (-0000) Subject: Now that we have more cpu feature bits, properly document SSE4a as such. X-Git-Tag: v0.6.2~6^2~14 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f0e2e9a262fef648031da856e57254272613e460;p=yasm Now that we have more cpu feature bits, properly document SSE4a as such. Also correct name of k8 chip (clawhammer) and add venice, k10 processors. svn path=/trunk/yasm/; revision=1946 --- diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py index 41265967..6d3127c3 100755 --- a/modules/arch/x86/gen_x86_insn.py +++ b/modules/arch/x86/gen_x86_insn.py @@ -32,7 +32,7 @@ ordered_cpus = [ "P4", "IA64", "Hammer"] ordered_cpu_features = [ "FPU", "Cyrix", "AMD", "MMX", "3DNow", "SMM", "SSE", "SSE2", - "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42"] + "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a"] unordered_cpu_features = ["Priv", "Prot", "Undoc", "Obs"] def cpu_lcd(cpu1, cpu2): @@ -4961,18 +4961,18 @@ for sfx, sz in zip("wlq", [16, 32, 64]): add_insn("popcnt", "cnt", modifiers=[0xB8], cpu=["SSE42"]) ##################################################################### -# AMD SSE4.1 instructions +# AMD SSE4a instructions ##################################################################### add_group("extrq", - cpu=["SSE41"], + cpu=["SSE4a"], prefix=0x66, opcode=[0x0F, 0x78], operands=[Operand(type="SIMDReg", size=128, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) add_group("extrq", - cpu=["SSE41"], + cpu=["SSE4a"], prefix=0x66, opcode=[0x0F, 0x79], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), @@ -4981,7 +4981,7 @@ add_group("extrq", add_insn("extrq", "extrq") add_group("insertq", - cpu=["SSE41"], + cpu=["SSE4a"], prefix=0xF2, opcode=[0x0F, 0x78], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), @@ -4989,7 +4989,7 @@ add_group("insertq", Operand(type="Imm", size=8, relaxed=True, dest="EA"), Operand(type="Imm", size=8, relaxed=True, dest="Imm")]) add_group("insertq", - cpu=["SSE41"], + cpu=["SSE4a"], prefix=0xF2, opcode=[0x0F, 0x79], operands=[Operand(type="SIMDReg", size=128, dest="Spare"), @@ -4998,7 +4998,7 @@ add_group("insertq", add_insn("insertq", "insertq") add_group("movntsd", - cpu=["SSE41"], + cpu=["SSE4a"], prefix=0xF2, opcode=[0x0F, 0x2B], operands=[Operand(type="Mem", size=64, relaxed=True, dest="EA"), @@ -5007,7 +5007,7 @@ add_group("movntsd", add_insn("movntsd", "movntsd") add_group("movntss", - cpu=["SSE41"], + cpu=["SSE4a"], prefix=0xF3, opcode=[0x0F, 0x2B], operands=[Operand(type="Mem", size=32, relaxed=True, dest="EA"), diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h index efd8e74f..5f5be1c0 100644 --- a/modules/arch/x86/x86arch.h +++ b/modules/arch/x86/x86arch.h @@ -63,6 +63,7 @@ #define CPU_SSSE3 29 /* Streaming SIMD extensions 3 required */ #define CPU_SSE41 30 /* Streaming SIMD extensions 4.1 required */ #define CPU_SSE42 31 /* Streaming SIMD extensions 4.2 required */ +#define CPU_SSE4a 32 /* AMD Streaming SIMD extensions 4a required */ /* Technically not CPU capabilities, they do affect what instructions are * available. These are tested against BITS==64. diff --git a/modules/arch/x86/x86cpu.gperf b/modules/arch/x86/x86cpu.gperf index 1feae1db..ed2b5232 100644 --- a/modules/arch/x86/x86cpu.gperf +++ b/modules/arch/x86/x86cpu.gperf @@ -118,6 +118,8 @@ x86_cpu_ia64(wordptr cpu, unsigned int data) BitVector_Bit_On(cpu, CPU_086); } +#define PROC_k10 10 +#define PROC_venice 9 #define PROC_hammer 8 #define PROC_k7 7 #define PROC_k6 6 @@ -131,6 +133,10 @@ x86_cpu_amd(wordptr cpu, unsigned int data) BitVector_Bit_On(cpu, CPU_Prot); BitVector_Bit_On(cpu, CPU_SMM); BitVector_Bit_On(cpu, CPU_3DNow); + if (data >= PROC_k10) + BitVector_Bit_On(cpu, CPU_SSE4a); + if (data >= PROC_venice) + BitVector_Bit_On(cpu, CPU_SSE3); if (data >= PROC_hammer) BitVector_Bit_On(cpu, CPU_SSE2); if (data >= PROC_k7) @@ -240,11 +246,14 @@ itanium, x86_cpu_ia64, 0 k6, x86_cpu_amd, PROC_k6 k7, x86_cpu_amd, PROC_k7 athlon, x86_cpu_amd, PROC_k7 +k8, x86_cpu_amd, PROC_hammer hammer, x86_cpu_amd, PROC_hammer -sledgehammer, x86_cpu_amd, PROC_hammer +clawhammer, x86_cpu_amd, PROC_hammer opteron, x86_cpu_amd, PROC_hammer athlon64, x86_cpu_amd, PROC_hammer athlon-64, x86_cpu_amd, PROC_hammer +venice, x86_cpu_amd, PROC_venice +k10, x86_cpu_amd, PROC_k10 prescott, x86_cpu_intel, PROC_prescott conroe, x86_cpu_intel, PROC_conroe penryn, x86_cpu_intel, PROC_penryn @@ -299,8 +308,14 @@ ssse3, x86_cpu_set, CPU_SSSE3 nossse3, x86_cpu_clear, CPU_SSSE3 sse4.1, x86_cpu_set, CPU_SSE41 nosse4.1, x86_cpu_clear, CPU_SSE41 +sse41, x86_cpu_set, CPU_SSE41 +nosse41, x86_cpu_clear, CPU_SSE41 sse4.2, x86_cpu_set, CPU_SSE42 nosse4.2, x86_cpu_clear, CPU_SSE42 +sse42, x86_cpu_set, CPU_SSE42 +nosse42, x86_cpu_clear, CPU_SSE42 +sse4a, x86_cpu_set, CPU_SSE4a +nosse4a, x86_cpu_clear, CPU_SSE4a sse4, x86_cpu_set_sse4, 0 nosse4, x86_cpu_clear_sse4, 0 %%