[X86] AMD znver2 enablement

author Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>

Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)

committer Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>

Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)
author Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)
committer Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)
diff --git a/include/llvm/Support/X86TargetParser.def b/include/llvm/Support/X86TargetParser.def

index fb82228c4dff4c337ceec1f8c0cf85e17bf8fb02..2b5936bb2607be770bef0c574cf2639e8040c0f0 100644 (file)
--- a/include/llvm/Support/X86TargetParser.def
+++ b/include/llvm/Support/X86TargetParser.def
@@ -98,6 +98,7 @@ X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-a
  X86_CPU_SUBTYPE_COMPAT("cannonlake",     INTEL_COREI7_CANNONLAKE,     "cannonlake")
  X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
  X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
+X86_CPU_SUBTYPE_COMPAT("znver2",         AMDFAM17H_ZNVER2,            "znver2")
  // Entries below this are not in libgcc/compiler-rt.
  X86_CPU_SUBTYPE       ("core2",          INTEL_CORE2_65)
  X86_CPU_SUBTYPE       ("penryn",         INTEL_CORE2_45)
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp

index 35bc97222410963c0cefd691f8ddb31a5865b3d1..52e7080e7446f32cd07488fd45b331ffb7de4221 100644 (file)
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -916,7 +916,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
      break; // "btver2"
    case 23:
      *Type = X86::AMDFAM17H;
-    *Subtype = X86::AMDFAM17H_ZNVER1;
+    if (Model >= 0x30 && Model <= 0x3f) {
+      *Subtype = X86::AMDFAM17H_ZNVER2;
+      break; // "znver2"; 30h-3fh: Zen2
+    }
+    if (Model <= 0x0f) {
+      *Subtype = X86::AMDFAM17H_ZNVER1;
+      break; // "znver1"; 00h-0Fh: Zen1
+    }
      break;
    default:
      break; // "generic"
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td

index 502278bd8a55340f732d9a332ae18bdd2696373f..094790023b1035b19b78b833131a02a61ba659c1 100644 (file)
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1143,8 +1143,8 @@ def : Proc<"bdver4", [
    FeatureMacroFusion
  ]>;
  
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
+// AMD Zen Processors common ISAs
+def ZNFeatures : ProcessorFeatures<[], [
    FeatureADX,
    FeatureAES,
    FeatureAVX2,
@@ -1183,6 +1183,19 @@ def: ProcessorModel<"znver1", Znver1Model, [
    FeatureXSAVEOPT,
    FeatureXSAVES]>;
  
+class Znver1Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+]>;
+def : Znver1Proc<"znver1">;
+
+class Znver2Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+  FeatureCLWB,
+  FeatureRDPID,
+  FeatureWBNOINVD
+]>;
+def : Znver2Proc<"znver2">;
+
  def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
  
  def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
diff --git a/test/CodeGen/X86/cpus-amd.ll b/test/CodeGen/X86/cpus-amd.ll

index 5e90048d389690fd0b246351a3c470eb90094d14..c5716d68e636c8ff01353abab956a3e2517ed07d 100644 (file)
--- a/test/CodeGen/X86/cpus-amd.ll
+++ b/test/CodeGen/X86/cpus-amd.ll
@@ -26,6 +26,7 @@
  ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
  ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
  ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
  
  define void @foo() {
    ret void
diff --git a/test/CodeGen/X86/lzcnt-zext-cmp.ll b/test/CodeGen/X86/lzcnt-zext-cmp.ll

index 90abd2d06ba36c26055860208e8d6cf69909fd97..8d43a1b73234c7fc66ef88963c8349f7ace307df 100644 (file)
--- a/test/CodeGen/X86/lzcnt-zext-cmp.ll
+++ b/test/CodeGen/X86/lzcnt-zext-cmp.ll
@@ -5,6 +5,8 @@
  ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
  ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
  ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
  
  ; Test one 32-bit input, output is 32-bit, no transformations expected.
  define i32 @test_zext_cmp0(i32 %a) {
diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll

index 54c248f3b044f0ca7e2e32cef310e25b3bdb1591..f2c7c2fa4a5647e2403adcc30c4069f9edb0b8de 100644 (file)
--- a/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -47,6 +47,7 @@
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
  ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2        2>&1 | FileCheck %s --check-prefix=FAST
  
  ; Other chips with slow unaligned memory accesses
  
diff --git a/test/CodeGen/X86/x86-64-double-shifts-var.ll b/test/CodeGen/X86/x86-64-double-shifts-var.ll

index 10cb1843f3e0b2a1287d3ebe974b03d82d04f930..513e7774f6a79bda37ae7792556c39119ced4699 100644 (file)
--- a/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ b/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -13,8 +13,9 @@
  ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s
  ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s
  ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
  
-; Verify that for the X86_64 processors that are known to have poor latency 
+; Verify that for the X86_64 processors that are known to have poor latency
  ; double precision shift instructions we do not generate 'shld' or 'shrd'
  ; instructions.
  
@@ -25,7 +26,7 @@
  
  define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
  entry:
-; CHECK-NOT: shld 
+; CHECK-NOT: shld
    %sh_prom = zext i32 %c to i64
    %shl = shl i64 %a, %sh_prom
    %sub = sub nsw i32 64, %c
author	Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
	Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)
committer	Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
	Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)
include/llvm/Support/X86TargetParser.def		patch \| blob \| history
lib/Support/Host.cpp		patch \| blob \| history
lib/Target/X86/X86.td		patch \| blob \| history
test/CodeGen/X86/cpus-amd.ll		patch \| blob \| history
test/CodeGen/X86/lzcnt-zext-cmp.ll		patch \| blob \| history
test/CodeGen/X86/slow-unaligned-mem.ll		patch \| blob \| history
test/CodeGen/X86/x86-64-double-shifts-var.ll		patch \| blob \| history