From f9018a1eb743bb1e7419a7a772bc7bc8b5522ec7 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 9 May 2016 18:54:58 +0000 Subject: [PATCH] [Power9] Add support for -mcpu=pwr9 in the back end This patch corresponds to review: http://reviews.llvm.org/D19683 Simply adds the bits for being able to specify -mcpu=pwr9 to the back end. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268950 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Host.cpp | 1 + lib/Target/PowerPC/PPC.td | 5 +++++ lib/Target/PowerPC/PPCAsmPrinter.cpp | 4 +++- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 7 +++++-- lib/Target/PowerPC/PPCISelLowering.cpp | 4 +++- lib/Target/PowerPC/PPCInstrInfo.cpp | 4 ++++ lib/Target/PowerPC/PPCSubtarget.cpp | 2 ++ lib/Target/PowerPC/PPCSubtarget.h | 1 + lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 5 +++-- test/CodeGen/PowerPC/crypto_bifs.ll | 1 + 10 files changed, 28 insertions(+), 6 deletions(-) diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 247f8fd7d76..35c51f77c5b 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -622,6 +622,7 @@ StringRef sys::getHostCPUName() { .Case("POWER7", "pwr7") .Case("POWER8", "pwr8") .Case("POWER8E", "pwr8") + .Case("POWER9", "pwr9") .Default(generic); } #elif defined(__linux__) && defined(__arm__) diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index f0c2b019884..c66e855c3e5 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -47,6 +47,7 @@ def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; +def DirectivePwr9: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR9", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -200,6 +201,8 @@ def ProcessorFeatures { !listconcat(Power7FeatureList, Power8SpecificFeatures); list Power9SpecificFeatures = [FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0]; + list Power9FeatureList = + !listconcat(Power8FeatureList, Power9SpecificFeatures); } // Note: Future features to add when support is extended to more @@ -398,6 +401,8 @@ def : ProcessorModel<"pwr6x", G5Model, FeatureMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; +// FIXME: Same as P8 until the POWER9 scheduling info is available +def : ProcessorModel<"pwr9", P8Model, ProcessorFeatures.Power9FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32, FeatureMFTB]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index ac9d19c01a7..ecf184e4b0d 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1306,8 +1306,10 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) { "power6", "power6x", "power7", + // FIXME: why is power8 missing here? "ppc64", - "ppc64le" + "ppc64le", + "power9" }; // Get the numerically largest directive. diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 7234e30fa73..caab67d68b1 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -162,8 +162,9 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { unsigned Directive = DAG->MF.getSubtarget().getDarwinDirective(); // If we're using a special group-terminating nop, then we need only one. + // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || - Directive == PPC::DIR_PWR8 ) + Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9) return 1; return 5 - CurSlots; @@ -223,8 +224,10 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { DAG->MF.getSubtarget().getDarwinDirective(); // If the group has now filled all of its slots, or if we're using a special // group-terminating nop, the group is complete. + // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || - Directive == PPC::DIR_PWR8 || CurSlots == 6) { + Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR8 || + CurSlots == 6) { CurGroup.clear(); CurSlots = CurBranches = 0; } else { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 813bf27f009..bb6a0c5bbd3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -916,6 +916,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: + case PPC::DIR_PWR9: setPrefFunctionAlignment(4); setPrefLoopAlignment(4); break; @@ -11187,7 +11188,8 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: - case PPC::DIR_PWR8: { + case PPC::DIR_PWR8: + case PPC::DIR_PWR9: { if (!ML) break; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 204dc28ba25..313ab8846ad 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -93,6 +93,7 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, unsigned Directive = DAG->MF.getSubtarget().getDarwinDirective(); + // FIXME: Leaving this as-is until we have POWER9 scheduling info if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8) return new PPCDispatchGroupSBHazardRecognizer(II, DAG); @@ -181,6 +182,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case PPC::DIR_PWR6X: case PPC::DIR_PWR7: case PPC::DIR_PWR8: + // FIXME: Is this needed for POWER9? Latency += 2; break; } @@ -428,6 +430,8 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */ + // FIXME: Update when POWER9 scheduling model is ready. + case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break; } DebugLoc DL; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 57de0b61b1c..b87e5a84c03 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -170,6 +170,8 @@ static bool needsAggressiveScheduling(unsigned Directive) { case PPC::DIR_E5500: case PPC::DIR_PWR7: case PPC::DIR_PWR8: + // FIXME: Same as P8 until POWER9 scheduling info is available + case PPC::DIR_PWR9: return true; } } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index da7df797095..55500d3eb4b 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -56,6 +56,7 @@ namespace PPC { DIR_PWR6X, DIR_PWR7, DIR_PWR8, + DIR_PWR9, DIR_64 }; } diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index 85a684a8929..9331e41fb9c 100644 --- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -267,8 +267,9 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) { // For P7 and P8, floating-point instructions have a 6-cycle latency and // there are two execution units, so unroll by 12x for latency hiding. - if (Directive == PPC::DIR_PWR7 || - Directive == PPC::DIR_PWR8) + // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 || + Directive == PPC::DIR_PWR9) return 12; // For most things, modern systems have two execution units (and diff --git a/test/CodeGen/PowerPC/crypto_bifs.ll b/test/CodeGen/PowerPC/crypto_bifs.ll index f58935b85b6..62247e8118f 100644 --- a/test/CodeGen/PowerPC/crypto_bifs.ll +++ b/test/CodeGen/PowerPC/crypto_bifs.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s ; FIXME: The original intent was to add a check-next for the blr after every check. ; However, this currently fails since we don't eliminate stores of the unused -- 2.50.1