Add Intel post-32nm processor instructions (section 7 of AVX spec).

author Peter Johnson <peter@tortall.net>

Sat, 3 Jul 2010 02:29:22 +0000 (02:29 -0000)

committer Peter Johnson <peter@tortall.net>

Sat, 3 Jul 2010 02:29:22 +0000 (02:29 -0000)
author Peter Johnson <peter@tortall.net>
Sat, 3 Jul 2010 02:29:22 +0000 (02:29 -0000)
committer Peter Johnson <peter@tortall.net>
Sat, 3 Jul 2010 02:29:22 +0000 (02:29 -0000)
diff --git a/modules/arch/x86/gen_x86_insn.py b/modules/arch/x86/gen_x86_insn.py

index 1ca1bc10af1ba60379362ff242a2821715690194..a260e503f7aac797b154c1ffbcb7bba226a13cc9 100755 (executable)
--- a/modules/arch/x86/gen_x86_insn.py
+++ b/modules/arch/x86/gen_x86_insn.py
@@ -42,7 +42,8 @@ ordered_cpus = [
  ordered_cpu_features = [
      "FPU", "Cyrix", "AMD", "MMX", "3DNow", "SMM", "SSE", "SSE2",
      "SSE3", "SVM", "PadLock", "SSSE3", "SSE41", "SSE42", "SSE4a", "SSE5",
-    "AVX", "FMA", "AES", "CLMUL", "MOVBE", "XOP", "FMA4"]
+    "AVX", "FMA", "AES", "CLMUL", "MOVBE", "XOP", "FMA4", "F16C",
+    "FSGSBASE", "RDRND"]
  unordered_cpu_features = ["Priv", "Prot", "Undoc", "Obs"]
  
  # Predefined VEX prefix field values
@@ -6721,6 +6722,122 @@ for comb, combval in zip(["lql","hql","lqh","hqh"], [0x00,0x01,0x10,0x11]):
      add_insn("vpclmul"+comb+"qdq", "pclmulqdq_fixed",
               modifiers=[combval, VEXL0], avx=True)
  
+#####################################################################
+# AVX Post-32nm instructions
+#####################################################################
+
+# RDRND
+add_group("rdrand",
+    cpu=["RDRND"],
+    opersize=16,
+    opcode=[0x0F, 0xC7],
+    spare=6,
+    operands=[Operand(type="Reg", size=16, dest="EA")])
+add_group("rdrand",
+    #suffix="l",
+    cpu=["RDRND"],
+    opersize=32,
+    opcode=[0x0F, 0xC7],
+    spare=6,
+    operands=[Operand(type="Reg", size=32, dest="EA")])
+add_group("rdrand",
+    cpu=["RDRND"],
+    opersize=64,
+    opcode=[0x0F, 0xC7],
+    spare=6,
+    operands=[Operand(type="Reg", size=64, dest="EA")])
+add_insn("rdrand", "rdrand")
+
+# FSGSBASE instructions
+add_group("fs_gs_base",
+    only64=True,
+    cpu=["FSGSBASE"],
+    modifiers=['SpAdd'],
+    opersize=32,
+    prefix=0xF3,
+    opcode=[0x0F, 0xAE],
+    operands=[Operand(type="Reg", size=32, dest="EA")])
+add_group("fs_gs_base",
+    only64=True,
+    cpu=["FSGSBASE"],
+    opersize=64,
+    modifiers=['SpAdd'],
+    prefix=0xF3,
+    opcode=[0x0F, 0xAE],
+    operands=[Operand(type="Reg", size=64, dest="EA")])
+
+add_insn("rdfsbase", "fs_gs_base", modifiers=[0], only64=True)
+add_insn("rdgsbase", "fs_gs_base", modifiers=[1], only64=True)
+add_insn("wrfsbase", "fs_gs_base", modifiers=[2], only64=True)
+add_insn("wrgsbase", "fs_gs_base", modifiers=[3], only64=True)
+
+# Float-16 conversion instructions
+for g in ['ps2ph', 'ph2ps']:
+    operands1=[]
+    operands1.append(Operand(type="SIMDReg", size=128, dest="EA"))
+    operands1.append(Operand(type="SIMDReg", size=128, dest="Spare"))
+
+    operands2=[]
+    operands2.append(Operand(type="Mem", size=64, dest="EA"))
+    operands2.append(Operand(type="SIMDReg", size=128, dest="Spare"))
+
+    operands3=[]
+    operands3.append(Operand(type="SIMDReg", size=128, dest="EA"))
+    operands3.append(Operand(type="SIMDReg", size=256, dest="Spare"))
+
+    operands4=[]
+    operands4.append(Operand(type="Mem", size=128, dest="EA"))
+    operands4.append(Operand(type="SIMDReg", size=256, dest="Spare"))
+
+    if g == 'ph2ps':
+        operands1.reverse()
+        operands2.reverse()
+        operands3.reverse()
+        operands4.reverse()
+        map = 0x38
+    elif g == 'ps2ph':
+        immop = Operand(type="Imm", size=8, relaxed=True, dest="Imm")
+        operands1.append(immop)
+        operands2.append(immop)
+        operands3.append(immop)
+        operands4.append(immop)
+        map = 0x3A
+
+    add_group("avx_cvt" + g,
+        cpu=["F16C", "AVX"],
+        modifiers=["PreAdd", "Op2Add"],
+        vex=128,
+        prefix=0x00,
+        opcode=[0x0F, map, 0x00],
+        operands=operands1)
+
+    add_group("avx_cvt" + g,
+        cpu=["F16C", "AVX"],
+        modifiers=["PreAdd", "Op2Add"],
+        vex=128,
+        prefix=0x00,
+        opcode=[0x0F, map, 0x00],
+        operands=operands2)
+
+    add_group("avx_cvt" + g,
+        cpu=["F16C", "AVX"],
+        modifiers=["PreAdd", "Op2Add"],
+        vex=256,
+        prefix=0x00,
+        opcode=[0x0F, map, 0x00],
+        operands=operands3)
+
+    add_group("avx_cvt" + g,
+        cpu=["F16C", "AVX"],
+        modifiers=["PreAdd", "Op2Add"],
+        vex=256,
+        prefix=0x00,
+        opcode=[0x0F, map, 0x00],
+        operands=operands4)
+
+add_insn("vcvtps2ph", "avx_cvtps2ph", modifiers=[0x66, 0x1D], avx=True)
+add_insn("vcvtph2ps", "avx_cvtph2ps", modifiers=[0x66, 0x13], avx=True)
+
  #####################################################################
  # AMD SSE4a instructions
  #####################################################################
diff --git a/modules/arch/x86/tests/Makefile.inc b/modules/arch/x86/tests/Makefile.inc

index d6cef7b45797b157f15658107713a876cc058333..b99a52f9d35c10387e979a847e2d7d9a44d00deb 100644 (file)
--- a/modules/arch/x86/tests/Makefile.inc
+++ b/modules/arch/x86/tests/Makefile.inc
@@ -56,6 +56,8 @@ EXTRA_DIST += modules/arch/x86/tests/effaddr.hex
  EXTRA_DIST += modules/arch/x86/tests/enter.asm
  EXTRA_DIST += modules/arch/x86/tests/enter.errwarn
  EXTRA_DIST += modules/arch/x86/tests/enter.hex
+EXTRA_DIST += modules/arch/x86/tests/f16c.asm
+EXTRA_DIST += modules/arch/x86/tests/f16c.hex
  EXTRA_DIST += modules/arch/x86/tests/far64.asm
  EXTRA_DIST += modules/arch/x86/tests/far64.hex
  EXTRA_DIST += modules/arch/x86/tests/farbasic.asm
@@ -66,6 +68,8 @@ EXTRA_DIST += modules/arch/x86/tests/fcmov.asm
  EXTRA_DIST += modules/arch/x86/tests/fcmov.hex
  EXTRA_DIST += modules/arch/x86/tests/fma.asm
  EXTRA_DIST += modules/arch/x86/tests/fma.hex
+EXTRA_DIST += modules/arch/x86/tests/fsgsbase.asm
+EXTRA_DIST += modules/arch/x86/tests/fsgsbase.hex
  EXTRA_DIST += modules/arch/x86/tests/fwdequ64.asm
  EXTRA_DIST += modules/arch/x86/tests/fwdequ64.hex
  EXTRA_DIST += modules/arch/x86/tests/genopcode.asm
@@ -149,6 +153,8 @@ EXTRA_DIST += modules/arch/x86/tests/pushf-err.errwarn
  EXTRA_DIST += modules/arch/x86/tests/pushnosize.asm
  EXTRA_DIST += modules/arch/x86/tests/pushnosize.errwarn
  EXTRA_DIST += modules/arch/x86/tests/pushnosize.hex
+EXTRA_DIST += modules/arch/x86/tests/rdrnd.asm
+EXTRA_DIST += modules/arch/x86/tests/rdrnd.hex
  EXTRA_DIST += modules/arch/x86/tests/rep.asm
  EXTRA_DIST += modules/arch/x86/tests/rep.hex
  EXTRA_DIST += modules/arch/x86/tests/ret.asm
diff --git a/modules/arch/x86/tests/f16c.asm b/modules/arch/x86/tests/f16c.asm

new file mode 100644 (file)

index 0000000..06fa5f1
--- /dev/null
+++ b/modules/arch/x86/tests/f16c.asm
@@ -0,0 +1,10 @@
+[bits 64]
+vcvtph2ps ymm1, xmm2
+vcvtph2ps ymm1, oword [0]
+vcvtph2ps xmm1, xmm2
+vcvtph2ps xmm1, qword [0]
+
+vcvtps2ph xmm1, ymm2, 4
+vcvtps2ph oword [0], ymm2, 8
+vcvtps2ph xmm1, xmm2, 3
+vcvtps2ph qword [0], xmm2, 5
diff --git a/modules/arch/x86/tests/f16c.hex b/modules/arch/x86/tests/f16c.hex

new file mode 100644 (file)

index 0000000..8ba06fc
--- /dev/null
+++ b/modules/arch/x86/tests/f16c.hex
@@ -0,0 +1,64 @@
+c4 
+e2 
+7d 
+13 
+ca 
+c4 
+e2 
+7d 
+13 
+0c 
+25 
+00 
+00 
+00 
+00 
+c4 
+e2 
+79 
+13 
+ca 
+c4 
+e2 
+79 
+13 
+0c 
+25 
+00 
+00 
+00 
+00 
+c4 
+e3 
+7d 
+1d 
+d1 
+04 
+c4 
+e3 
+7d 
+1d 
+14 
+25 
+00 
+00 
+00 
+00 
+08 
+c4 
+e3 
+79 
+1d 
+d1 
+03 
+c4 
+e3 
+79 
+1d 
+14 
+25 
+00 
+00 
+00 
+00 
+05 
diff --git a/modules/arch/x86/tests/fsgsbase.asm b/modules/arch/x86/tests/fsgsbase.asm

new file mode 100644 (file)

index 0000000..bae7301
--- /dev/null
+++ b/modules/arch/x86/tests/fsgsbase.asm
@@ -0,0 +1,10 @@
+[bits 64]
+rdfsbase ebx
+rdfsbase rbx
+rdgsbase ecx
+rdgsbase rcx
+
+wrfsbase ebx
+wrfsbase rbx
+wrgsbase ecx
+wrgsbase rcx
diff --git a/modules/arch/x86/tests/fsgsbase.hex b/modules/arch/x86/tests/fsgsbase.hex

new file mode 100644 (file)

index 0000000..27b5eee
--- /dev/null
+++ b/modules/arch/x86/tests/fsgsbase.hex
@@ -0,0 +1,36 @@
+f3 
+0f 
+ae 
+c3 
+f3 
+48 
+0f 
+ae 
+c3 
+f3 
+0f 
+ae 
+c9 
+f3 
+48 
+0f 
+ae 
+c9 
+f3 
+0f 
+ae 
+d3 
+f3 
+48 
+0f 
+ae 
+d3 
+f3 
+0f 
+ae 
+d9 
+f3 
+48 
+0f 
+ae 
+d9 
diff --git a/modules/arch/x86/tests/rdrnd.asm b/modules/arch/x86/tests/rdrnd.asm

new file mode 100644 (file)

index 0000000..234451d
--- /dev/null
+++ b/modules/arch/x86/tests/rdrnd.asm
@@ -0,0 +1,4 @@
+[bits 64]
+rdrand cx
+rdrand ecx
+rdrand rcx
diff --git a/modules/arch/x86/tests/rdrnd.hex b/modules/arch/x86/tests/rdrnd.hex

new file mode 100644 (file)

index 0000000..be4bb80
--- /dev/null
+++ b/modules/arch/x86/tests/rdrnd.hex
@@ -0,0 +1,11 @@
+66 
+0f 
+c7 
+f1 
+0f 
+c7 
+f1 
+48 
+0f 
+c7 
+f1 
diff --git a/modules/arch/x86/x86arch.h b/modules/arch/x86/x86arch.h

index 1f324671aae2aafecf5f32a82570b8151f67554c..f66ff359d8125e052cee81699eb54674b4652523 100644 (file)
--- a/modules/arch/x86/x86arch.h
+++ b/modules/arch/x86/x86arch.h
@@ -72,6 +72,9 @@
  #define CPU_MOVBE   38      /* MOVBE instruction */
  #define CPU_XOP     39      /* AMD XOP extensions */
  #define CPU_FMA4    40      /* AMD Fused-Multiply-Add extensions */
+#define CPU_F16C    41      /* Intel float-16 instructions */
+#define CPU_FSGSBASE 42     /* Intel FSGSBASE instructions */
+#define CPU_RDRND   43      /* Intel RDRND instruction */
  
  enum x86_parser_type {
      X86_PARSER_NASM = 0,
diff --git a/modules/arch/x86/x86cpu.gperf b/modules/arch/x86/x86cpu.gperf

index 5d7534333dc6a9245c87ef598d8984be6ecfe072..5d03e57ccac984b5ba0671c0faeaf22d9b1b3357 100644 (file)
--- a/modules/arch/x86/x86cpu.gperf
+++ b/modules/arch/x86/x86cpu.gperf
@@ -377,6 +377,12 @@ xop,               x86_cpu_set,    CPU_XOP
  noxop,         x86_cpu_clear,  CPU_XOP
  fma4,          x86_cpu_set,    CPU_FMA4
  nofma4,                x86_cpu_clear,  CPU_FMA4
+f16c,          x86_cpu_set,    CPU_F16C
+nof16c,                x86_cpu_clear,  CPU_F16C
+fsgsbase,      x86_cpu_set,    CPU_FSGSBASE
+nofsgsbase,    x86_cpu_clear,  CPU_FSGSBASE
+rdrnd,         x86_cpu_set,    CPU_RDRND
+nordrnd,       x86_cpu_clear,  CPU_RDRND
  # Change NOP patterns
  basicnop,      x86_nop,        X86_NOP_BASIC
  intelnop,      x86_nop,        X86_NOP_INTEL
author	Peter Johnson <peter@tortall.net>
	Sat, 3 Jul 2010 02:29:22 +0000 (02:29 -0000)
committer	Peter Johnson <peter@tortall.net>
	Sat, 3 Jul 2010 02:29:22 +0000 (02:29 -0000)
modules/arch/x86/gen_x86_insn.py		patch \| blob \| history
modules/arch/x86/tests/Makefile.inc		patch \| blob \| history
modules/arch/x86/tests/f16c.asm	[new file with mode: 0644]	patch \| blob
modules/arch/x86/tests/f16c.hex	[new file with mode: 0644]	patch \| blob
modules/arch/x86/tests/fsgsbase.asm	[new file with mode: 0644]	patch \| blob
modules/arch/x86/tests/fsgsbase.hex	[new file with mode: 0644]	patch \| blob
modules/arch/x86/tests/rdrnd.asm	[new file with mode: 0644]	patch \| blob
modules/arch/x86/tests/rdrnd.hex	[new file with mode: 0644]	patch \| blob
modules/arch/x86/x86arch.h		patch \| blob \| history
modules/arch/x86/x86cpu.gperf		patch \| blob \| history