From 102324c09089f43452385e35362fde8755bdf54c Mon Sep 17 00:00:00 2001 From: Peter Johnson Date: Mon, 2 May 2005 00:49:55 +0000 Subject: [PATCH] Add SSE3 (PNI) instruction support. * x86id.re (yasm_x86__parse_cpu): Add cases for "PRESCOTT", "SSE3", and "PNI". (yasm_x86__parse_check_id): Add cases for all 13 new instructions. Update FILD and FISTP so that common fildstp_insn can be used. (lddqu_insn): New instruction format. * x86arch.h (CPU_SSE3): New. Renumber others. * sse3.asm, sse3.hex, sse3.errwarn: New test case for SSE3 instructions. * x86/tests/Makefile.inc: Include test in build. svn path=/trunk/yasm/; revision=1204 --- modules/arch/x86/tests/Makefile.inc | 3 + modules/arch/x86/tests/sse3.asm | 25 ++++++ modules/arch/x86/tests/sse3.errwarn | 0 modules/arch/x86/tests/sse3.hex | 118 ++++++++++++++++++++++++++++ modules/arch/x86/x86arch.h | 21 ++--- modules/arch/x86/x86id.re | 41 ++++++++-- 6 files changed, 193 insertions(+), 15 deletions(-) create mode 100644 modules/arch/x86/tests/sse3.asm create mode 100644 modules/arch/x86/tests/sse3.errwarn create mode 100644 modules/arch/x86/tests/sse3.hex diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc index c67cad39..7d3cabca 100644 --- a/modules/arch/x86/tests/Makefile.inc +++ b/modules/arch/x86/tests/Makefile.inc @@ -127,6 +127,9 @@ EXTRA_DIST += modules/arch/x86/tests/simd-2.hex EXTRA_DIST += modules/arch/x86/tests/simd64-1.asm EXTRA_DIST += modules/arch/x86/tests/simd64-1.errwarn EXTRA_DIST += modules/arch/x86/tests/simd64-1.hex +EXTRA_DIST += modules/arch/x86/tests/sse3.asm +EXTRA_DIST += modules/arch/x86/tests/sse3.errwarn +EXTRA_DIST += modules/arch/x86/tests/sse3.hex EXTRA_DIST += modules/arch/x86/tests/stos.asm EXTRA_DIST += modules/arch/x86/tests/stos.errwarn EXTRA_DIST += modules/arch/x86/tests/stos.hex diff --git a/modules/arch/x86/tests/sse3.asm b/modules/arch/x86/tests/sse3.asm new file mode 100644 index 00000000..bf53ab00 --- /dev/null +++ b/modules/arch/x86/tests/sse3.asm @@ -0,0 +1,25 @@ +[bits 32] +addsubpd xmm5, xmm7 +addsubpd xmm0, [eax] +addsubps xmm1, xmm5 +addsubps xmm3, dqword [edx] +fisttp word [0] +fisttp dword [4] +fisttp qword [8] +haddpd xmm2, xmm4 +haddpd xmm7, [ecx+4] +haddps xmm6, xmm1 +haddps xmm0, dqword [0] +hsubpd xmm5, xmm3 +hsubpd xmm1, [ebp] +hsubps xmm4, xmm1 +hsubps xmm2, [esp] +lddqu xmm3, [ecx+edx*4+8] +monitor +movddup xmm7, xmm6 +movddup xmm1, qword [4] +movshdup xmm3, xmm4 +movshdup xmm2, [0] +movsldup xmm0, xmm7 +movsldup xmm5, dqword [eax+ebx] +mwait diff --git a/modules/arch/x86/tests/sse3.errwarn b/modules/arch/x86/tests/sse3.errwarn new file mode 100644 index 00000000..e69de29b diff --git a/modules/arch/x86/tests/sse3.hex b/modules/arch/x86/tests/sse3.hex new file mode 100644 index 00000000..86014e61 --- /dev/null +++ b/modules/arch/x86/tests/sse3.hex @@ -0,0 +1,118 @@ +66 +0f +d0 +ef +66 +0f +d0 +00 +f2 +0f +d0 +cd +f2 +0f +d0 +1a +df +0d +00 +00 +00 +00 +db +0d +04 +00 +00 +00 +dd +0d +08 +00 +00 +00 +66 +0f +7c +d4 +66 +0f +7c +79 +04 +f2 +0f +7c +f1 +f2 +0f +7c +05 +00 +00 +00 +00 +66 +0f +7d +eb +66 +0f +7d +4d +00 +f2 +0f +7d +e1 +f2 +0f +7d +14 +24 +f2 +0f +f0 +5c +91 +08 +0f +01 +c8 +f2 +0f +12 +fe +f2 +0f +12 +0d +04 +00 +00 +00 +f3 +0f +16 +dc +f3 +0f +16 +15 +00 +00 +00 +00 +f3 +0f +12 +c7 +f3 +0f +12 +2c +18 +0f +01 +c9 diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h index b89b3b19..b5533327 100644 --- a/modules/arch/x86/x86arch.h +++ b/modules/arch/x86/x86arch.h @@ -46,20 +46,21 @@ #define CPU_MMX (1UL<<13) /* MMX support required */ #define CPU_SSE (1UL<<14) /* Streaming SIMD extensions required */ #define CPU_SSE2 (1UL<<15) /* Streaming SIMD extensions 2 required */ -#define CPU_3DNow (1UL<<16) /* 3DNow! support required */ -#define CPU_Cyrix (1UL<<17) /* Cyrix-specific instruction */ -#define CPU_AMD (1UL<<18) /* AMD-specific inst. (older than K6) */ -#define CPU_SMM (1UL<<19) /* System Management Mode instruction */ -#define CPU_Prot (1UL<<20) /* Protected mode only instruction */ -#define CPU_Undoc (1UL<<21) /* Undocumented instruction */ -#define CPU_Obs (1UL<<22) /* Obsolete instruction */ -#define CPU_Priv (1UL<<23) /* Priveleged instruction */ +#define CPU_SSE3 (1UL<<16) /* Streaming SIMD extensions 3 required */ +#define CPU_3DNow (1UL<<17) /* 3DNow! support required */ +#define CPU_Cyrix (1UL<<18) /* Cyrix-specific instruction */ +#define CPU_AMD (1UL<<19) /* AMD-specific inst. (older than K6) */ +#define CPU_SMM (1UL<<20) /* System Management Mode instruction */ +#define CPU_Prot (1UL<<21) /* Protected mode only instruction */ +#define CPU_Undoc (1UL<<22) /* Undocumented instruction */ +#define CPU_Obs (1UL<<23) /* Obsolete instruction */ +#define CPU_Priv (1UL<<24) /* Priveleged instruction */ /* Technically not CPU capabilities, they do affect what instructions are * available. These are tested against BITS==64. */ -#define CPU_64 (1UL<<24) /* Only available in 64-bit mode */ -#define CPU_Not64 (1UL<<25) /* Not available (invalid) in 64-bit mode */ +#define CPU_64 (1UL<<25) /* Only available in 64-bit mode */ +#define CPU_Not64 (1UL<<26) /* Not available (invalid) in 64-bit mode */ typedef struct yasm_arch_x86 { yasm_arch_base arch; /* base structure */ diff --git a/modules/arch/x86/x86id.re b/modules/arch/x86/x86id.re index 60dd1dba..86a7ed96 100644 --- a/modules/arch/x86/x86id.re +++ b/modules/arch/x86/x86id.re @@ -1197,7 +1197,7 @@ static const x86_insn_info fildstp_insn[] = { {OPT_Mem|OPS_16|OPA_EA, 0, 0} }, { CPU_FPU, MOD_SpAdd, 0, 0, 0, 1, {0xDB, 0, 0}, 0, 1, {OPT_Mem|OPS_32|OPA_EA, 0, 0} }, - { CPU_FPU, MOD_Gap0|MOD_SpAdd, 0, 0, 0, 1, {0xDF, 0, 0}, 0, 1, + { CPU_FPU, MOD_Gap0|MOD_Op0Add|MOD_SpAdd, 0, 0, 0, 1, {0xDD, 0, 0}, 0, 1, {OPT_Mem|OPS_64|OPA_EA, 0, 0} } }; static const x86_insn_info fbldstp_insn[] = { @@ -1643,6 +1643,12 @@ static const x86_insn_info pslrldq_insn[] = { {OPT_SIMDReg|OPS_128|OPA_EA, OPT_Imm|OPS_8|OPS_Relaxed|OPA_Imm, 0} } }; +/* SSE3 instructions */ +static const x86_insn_info lddqu_insn[] = { + { CPU_SSE3, 0, 0, 0, 0xF2, 2, {0x0F, 0xF0, 0}, 0, 2, + {OPT_SIMDReg|OPS_128|OPA_Spare, OPT_Mem|OPS_Any|OPA_EA, 0} } +}; + /* AMD 3DNow! instructions */ static const x86_insn_info now3d_insn[] = { { CPU_3DNow, MOD_Imm8, 0, 0, 0, 2, {0x0F, 0x0F, 0}, 0, 2, @@ -2553,8 +2559,15 @@ yasm_x86__parse_cpu(yasm_arch *arch, const char *id, unsigned long line) (A T H L O N "-"? "64") { arch_x86->cpu_enabled = CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|CPU_K6| - CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE|CPU_3DNow| - CPU_SMM|CPU_Prot|CPU_Priv; + CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2| + CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv; + return; + } + P R E S C O T T { + arch_x86->cpu_enabled = + CPU_186|CPU_286|CPU_386|CPU_486|CPU_586|CPU_686|CPU_K6| + CPU_Athlon|CPU_Hammer|CPU_FPU|CPU_MMX|CPU_SSE|CPU_SSE2| + CPU_SSE3|CPU_3DNow|CPU_SMM|CPU_Prot|CPU_Priv; return; } @@ -2569,6 +2582,10 @@ yasm_x86__parse_cpu(yasm_arch *arch, const char *id, unsigned long line) N O S S E { arch_x86->cpu_enabled &= ~CPU_SSE; return; } S S E "2" { arch_x86->cpu_enabled |= CPU_SSE2; return; } N O S S E "2" { arch_x86->cpu_enabled &= ~CPU_SSE2; return; } + S S E "3" { arch_x86->cpu_enabled |= CPU_SSE3; return; } + N O S S E "3" { arch_x86->cpu_enabled &= ~CPU_SSE3; return; } + P N I { arch_x86->cpu_enabled |= CPU_SSE3; return; } + N O P N I { arch_x86->cpu_enabled &= ~CPU_SSE3; return; } "3" D N O W { arch_x86->cpu_enabled |= CPU_3DNow; return; } N O "3" D N O W { arch_x86->cpu_enabled &= ~CPU_3DNow; return; } C Y R I X { arch_x86->cpu_enabled |= CPU_Cyrix; return; } @@ -3454,12 +3471,12 @@ yasm_x86__parse_check_id(yasm_arch *arch, unsigned long data[4], V E R W { RET_INSN(prot286, 0x0500, CPU_286|CPU_Prot); } /* Floating point instructions */ F L D { RET_INSN(fldstp, 0x0500C0, CPU_FPU); } - F I L D { RET_INSN(fildstp, 0x0500, CPU_FPU); } + F I L D { RET_INSN(fildstp, 0x050200, CPU_FPU); } F B L D { RET_INSN(fbldstp, 0x04, CPU_FPU); } F S T { RET_INSN(fst, 0, CPU_FPU); } F I S T { RET_INSN(fiarith, 0x02DB, CPU_FPU); } F S T P { RET_INSN(fldstp, 0x0703D8, CPU_FPU); } - F I S T P { RET_INSN(fildstp, 0x0703, CPU_FPU); } + F I S T P { RET_INSN(fildstp, 0x070203, CPU_FPU); } F B S T P { RET_INSN(fbldstp, 0x06, CPU_FPU); } F X C H { RET_INSN(fxch, 0, CPU_FPU); } F C O M { RET_INSN(fcom, 0x02D0, CPU_FPU); } @@ -3830,6 +3847,20 @@ yasm_x86__parse_check_id(yasm_arch *arch, unsigned long data[4], P S R L D Q { RET_INSN(pslrldq, 0x03, CPU_SSE2); } P U N P C K H Q D Q { RET_INSN(ssess, 0x666D, CPU_SSE2); } P U N P C K L Q D Q { RET_INSN(ssess, 0x666C, CPU_SSE2); } + /* SSE3 / PNI (Prescott New Instructions) instructions */ + A D D S U B P D { RET_INSN(ssess, 0x66D0, CPU_SSE3); } + A D D S U B P S { RET_INSN(ssess, 0xF2D0, CPU_SSE3); } + F I S T T P { RET_INSN(fildstp, 0x010001, CPU_SSE3); } + H A D D P D { RET_INSN(ssess, 0x667C, CPU_SSE3); } + H A D D P S { RET_INSN(ssess, 0xF27C, CPU_SSE3); } + H S U B P D { RET_INSN(ssess, 0x667D, CPU_SSE3); } + H S U B P S { RET_INSN(ssess, 0xF27D, CPU_SSE3); } + L D D Q U { RET_INSN(lddqu, 0, CPU_SSE3); } + M O N I T O R { RET_INSN(threebyte, 0x0F01C8, CPU_SSE3); } + M O V D D U P { RET_INSN(cvt_xmm_xmm64_ss, 0xF212, CPU_SSE3); } + M O V S H D U P { RET_INSN(ssess, 0xF316, CPU_SSE3); } + M O V S L D U P { RET_INSN(ssess, 0xF312, CPU_SSE3); } + M W A I T { RET_INSN(threebyte, 0x0F01C9, CPU_SSE3); } /* AMD 3DNow! instructions */ P R E F E T C H { RET_INSN(twobytemem, 0x000F0D, CPU_3DNow); } P R E F E T C H W { RET_INSN(twobytemem, 0x010F0D, CPU_3DNow); } -- 2.40.0