]> granicus.if.org Git - llvm/commitdiff
[X86] AMD znver2 enablement
authorGanesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)
committerGanesh Gopalasubramanian <Ganesh.Gopalasubramanian@amd.com>
Tue, 26 Feb 2019 16:55:10 +0000 (16:55 +0000)
This patch enables the following

1) AMD family 17h "znver2" tune flag (-march, -mcpu).
2) ISAs that are enabled for "znver2" architecture.
3) For the time being, it uses the znver1 scheduler model.
4) Tests are updated.
5) Scheduler descriptions are yet to be put in place.

Reviewers: craig.topper

Differential Revision: https://reviews.llvm.org/D58343

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354897 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/Support/X86TargetParser.def
lib/Support/Host.cpp
lib/Target/X86/X86.td
test/CodeGen/X86/cpus-amd.ll
test/CodeGen/X86/lzcnt-zext-cmp.ll
test/CodeGen/X86/slow-unaligned-mem.ll
test/CodeGen/X86/x86-64-double-shifts-var.ll

index fb82228c4dff4c337ceec1f8c0cf85e17bf8fb02..2b5936bb2607be770bef0c574cf2639e8040c0f0 100644 (file)
@@ -98,6 +98,7 @@ X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-a
 X86_CPU_SUBTYPE_COMPAT("cannonlake",     INTEL_COREI7_CANNONLAKE,     "cannonlake")
 X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
 X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
+X86_CPU_SUBTYPE_COMPAT("znver2",         AMDFAM17H_ZNVER2,            "znver2")
 // Entries below this are not in libgcc/compiler-rt.
 X86_CPU_SUBTYPE       ("core2",          INTEL_CORE2_65)
 X86_CPU_SUBTYPE       ("penryn",         INTEL_CORE2_45)
index 35bc97222410963c0cefd691f8ddb31a5865b3d1..52e7080e7446f32cd07488fd45b331ffb7de4221 100644 (file)
@@ -916,7 +916,14 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     break; // "btver2"
   case 23:
     *Type = X86::AMDFAM17H;
-    *Subtype = X86::AMDFAM17H_ZNVER1;
+    if (Model >= 0x30 && Model <= 0x3f) {
+      *Subtype = X86::AMDFAM17H_ZNVER2;
+      break; // "znver2"; 30h-3fh: Zen2
+    }
+    if (Model <= 0x0f) {
+      *Subtype = X86::AMDFAM17H_ZNVER1;
+      break; // "znver1"; 00h-0Fh: Zen1
+    }
     break;
   default:
     break; // "generic"
index 502278bd8a55340f732d9a332ae18bdd2696373f..094790023b1035b19b78b833131a02a61ba659c1 100644 (file)
@@ -1143,8 +1143,8 @@ def : Proc<"bdver4", [
   FeatureMacroFusion
 ]>;
 
-// Znver1
-def: ProcessorModel<"znver1", Znver1Model, [
+// AMD Zen Processors common ISAs
+def ZNFeatures : ProcessorFeatures<[], [
   FeatureADX,
   FeatureAES,
   FeatureAVX2,
@@ -1183,6 +1183,19 @@ def: ProcessorModel<"znver1", Znver1Model, [
   FeatureXSAVEOPT,
   FeatureXSAVES]>;
 
+class Znver1Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+]>;
+def : Znver1Proc<"znver1">;
+
+class Znver2Proc<string Name> : ProcModel<Name, Znver1Model,
+                                         ZNFeatures.Value, [
+  FeatureCLWB,
+  FeatureRDPID,
+  FeatureWBNOINVD
+]>;
+def : Znver2Proc<"znver2">;
+
 def : Proc<"geode",           [FeatureX87, FeatureSlowUAMem16, Feature3DNowA]>;
 
 def : Proc<"winchip-c6",      [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>;
index 5e90048d389690fd0b246351a3c470eb90094d14..c5716d68e636c8ff01353abab956a3e2517ed07d 100644 (file)
@@ -26,6 +26,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void
index 90abd2d06ba36c26055860208e8d6cf69909fd97..8d43a1b73234c7fc66ef88963c8349f7ace307df 100644 (file)
@@ -5,6 +5,8 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s
 
 ; Test one 32-bit input, output is 32-bit, no transformations expected.
 define i32 @test_zext_cmp0(i32 %a) {
index 54c248f3b044f0ca7e2e32cef310e25b3bdb1591..f2c7c2fa4a5647e2403adcc30c4069f9edb0b8de 100644 (file)
@@ -47,6 +47,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2        2>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 
index 10cb1843f3e0b2a1287d3ebe974b03d82d04f930..513e7774f6a79bda37ae7792556c39119ced4699 100644 (file)
@@ -13,8 +13,9 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver3 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=bdver4 | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
 
-; Verify that for the X86_64 processors that are known to have poor latency 
+; Verify that for the X86_64 processors that are known to have poor latency
 ; double precision shift instructions we do not generate 'shld' or 'shrd'
 ; instructions.
 
@@ -25,7 +26,7 @@
 
 define i64 @lshift(i64 %a, i64 %b, i32 %c) nounwind readnone {
 entry:
-; CHECK-NOT: shld 
+; CHECK-NOT: shld
   %sh_prom = zext i32 %c to i64
   %shl = shl i64 %a, %sh_prom
   %sub = sub nsw i32 64, %c