]> granicus.if.org Git - llvm/commitdiff
[Power9] Add support for -mcpu=pwr9 in the back end
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>
Mon, 9 May 2016 18:54:58 +0000 (18:54 +0000)
committerNemanja Ivanovic <nemanja.i.ibm@gmail.com>
Mon, 9 May 2016 18:54:58 +0000 (18:54 +0000)
This patch corresponds to review:
http://reviews.llvm.org/D19683

Simply adds the bits for being able to specify -mcpu=pwr9 to the back end.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268950 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Support/Host.cpp
lib/Target/PowerPC/PPC.td
lib/Target/PowerPC/PPCAsmPrinter.cpp
lib/Target/PowerPC/PPCHazardRecognizers.cpp
lib/Target/PowerPC/PPCISelLowering.cpp
lib/Target/PowerPC/PPCInstrInfo.cpp
lib/Target/PowerPC/PPCSubtarget.cpp
lib/Target/PowerPC/PPCSubtarget.h
lib/Target/PowerPC/PPCTargetTransformInfo.cpp
test/CodeGen/PowerPC/crypto_bifs.ll

index 247f8fd7d7688f420621baf7d65fd5a6ee239fb3..35c51f77c5ba5220e4a1ffde146aae6ae4aa621a 100644 (file)
@@ -622,6 +622,7 @@ StringRef sys::getHostCPUName() {
     .Case("POWER7", "pwr7")
     .Case("POWER8", "pwr8")
     .Case("POWER8E", "pwr8")
+    .Case("POWER9", "pwr9")
     .Default(generic);
 }
 #elif defined(__linux__) && defined(__arm__)
index f0c2b019884bdce371c2b200dd67924dae2ed69a..c66e855c3e5a3283930c6d69a9bd75759db54415 100644 (file)
@@ -47,6 +47,7 @@ def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
 def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
 def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
 def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
+def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">;
 
 def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
                                         "Enable 64-bit instructions">;
@@ -200,6 +201,8 @@ def ProcessorFeatures {
       !listconcat(Power7FeatureList, Power8SpecificFeatures);
   list<SubtargetFeature> Power9SpecificFeatures =
       [FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
+  list<SubtargetFeature> Power9FeatureList =
+      !listconcat(Power8FeatureList, Power9SpecificFeatures);
 }
 
 // Note: Future features to add when support is extended to more
@@ -398,6 +401,8 @@ def : ProcessorModel<"pwr6x", G5Model,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
+// FIXME: Same as P8 until the POWER9 scheduling info is available
+def : ProcessorModel<"pwr9", P8Model, ProcessorFeatures.Power9FeatureList>;
 def : Processor<"ppc", G3Itineraries, [Directive32, FeatureMFTB]>;
 def : ProcessorModel<"ppc64", G5Model,
                   [Directive64, FeatureAltivec,
index ac9d19c01a7eb79805af9e8eb9ce76b6661e8a94..ecf184e4b0dfa612c542d96280d615b8d87a64fb 100644 (file)
@@ -1306,8 +1306,10 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
     "power6",
     "power6x",
     "power7",
+    // FIXME: why is power8 missing here?
     "ppc64",
-    "ppc64le"
+    "ppc64le",
+    "power9"
   };
 
   // Get the numerically largest directive.
index 7234e30fa73e612639df90297904a19490534025..caab67d68b171ea3501fa58d3c0d2f9c40061ace 100644 (file)
@@ -162,8 +162,9 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
     unsigned Directive =
         DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
     // If we're using a special group-terminating nop, then we need only one.
+    // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
     if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
-        Directive == PPC::DIR_PWR8 )
+        Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9)
       return 1;
 
     return 5 - CurSlots;
@@ -223,8 +224,10 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
       DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
   // If the group has now filled all of its slots, or if we're using a special
   // group-terminating nop, the group is complete.
+  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
   if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
-      Directive == PPC::DIR_PWR8 || CurSlots == 6)  {
+      Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR8 ||
+      CurSlots == 6) {
     CurGroup.clear();
     CurSlots = CurBranches = 0;
   } else {
index 813bf27f0091de71904a66fc6c8c8e81bd9a6b08..bb6a0c5bbd3b33aac229333b484760fe069068a5 100644 (file)
@@ -916,6 +916,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   case PPC::DIR_PWR6X:
   case PPC::DIR_PWR7:
   case PPC::DIR_PWR8:
+  case PPC::DIR_PWR9:
     setPrefFunctionAlignment(4);
     setPrefLoopAlignment(4);
     break;
@@ -11187,7 +11188,8 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
   case PPC::DIR_PWR6:
   case PPC::DIR_PWR6X:
   case PPC::DIR_PWR7:
-  case PPC::DIR_PWR8: {
+  case PPC::DIR_PWR8:
+  case PPC::DIR_PWR9: {
     if (!ML)
       break;
 
index 204dc28ba25392835455fe6a524b6563e8c1ef26..313ab8846ad2711925ecb55a474e1479b1c68edf 100644 (file)
@@ -93,6 +93,7 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
   unsigned Directive =
       DAG->MF.getSubtarget<PPCSubtarget>().getDarwinDirective();
 
+  // FIXME: Leaving this as-is until we have POWER9 scheduling info
   if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
     return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
 
@@ -181,6 +182,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case PPC::DIR_PWR6X:
     case PPC::DIR_PWR7:
     case PPC::DIR_PWR8:
+    // FIXME: Is this needed for POWER9?
       Latency += 2;
       break;
     }
@@ -428,6 +430,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
   case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
   case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
   case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
+  // FIXME: Update when POWER9 scheduling model is ready.
+  case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;
   }
 
   DebugLoc DL;
index 57de0b61b1c42c5e036a51f05d234719e897250d..b87e5a84c039d29d87854c857c40333162532973 100644 (file)
@@ -170,6 +170,8 @@ static bool needsAggressiveScheduling(unsigned Directive) {
   case PPC::DIR_E5500:
   case PPC::DIR_PWR7:
   case PPC::DIR_PWR8:
+  // FIXME: Same as P8 until POWER9 scheduling info is available
+  case PPC::DIR_PWR9:
     return true;
   }
 }
index da7df79709556112d8a74e0561420cd79a5e44d8..55500d3eb4bcab868d31829d1590ea3ce2320418 100644 (file)
@@ -56,6 +56,7 @@ namespace PPC {
     DIR_PWR6X,
     DIR_PWR7,
     DIR_PWR8,
+    DIR_PWR9,
     DIR_64
   };
 }
index 85a684a89292b5565611d7ad20083e8bf498167c..9331e41fb9c1958b2cd7988c982756213c50f705 100644 (file)
@@ -267,8 +267,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
 
   // For P7 and P8, floating-point instructions have a 6-cycle latency and
   // there are two execution units, so unroll by 12x for latency hiding.
-  if (Directive == PPC::DIR_PWR7 ||
-      Directive == PPC::DIR_PWR8)
+  // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
+  if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
+      Directive == PPC::DIR_PWR9)
     return 12;
 
   // For most things, modern systems have two execution units (and
index f58935b85b6665361f5159d38890576735a24d92..62247e8118f48b2a394f402c304fdd8b1fd4d014 100644 (file)
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
 ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
 ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s
 ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
 ; FIXME: The original intent was to add a check-next for the blr after every check.
 ; However, this currently fails since we don't eliminate stores of the unused