From 7fba976622e90ce0aa81f24e66463c6a0b022545 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Wed, 1 Feb 2017 02:54:42 +0000 Subject: [PATCH] [AArch64] Add new target feature to fuse literal generation This feature enables the fusion of such operations on Cortex A57, as recommended in its Software Optimisation Guide, sections 4.14 and 4.15. Differential revision: https://reviews.llvm.org/D28698 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293739 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64.td | 5 +++++ lib/Target/AArch64/AArch64MacroFusion.cpp | 25 +++++++++++++++++++++ lib/Target/AArch64/AArch64Subtarget.h | 2 ++ lib/Target/AArch64/AArch64TargetMachine.cpp | 14 ++++++++++++ 4 files changed, 46 insertions(+) diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 1f21da75681..deeb98b7f55 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -107,6 +107,10 @@ def FeatureFuseAES : SubtargetFeature< "fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; +def FeatureFuseLiterals : SubtargetFeature< + "fuse-literals", "HasFuseLiterals", "true", + "CPU fuses literal generation operations">; + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; @@ -189,6 +193,7 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, diff --git a/lib/Target/AArch64/AArch64MacroFusion.cpp b/lib/Target/AArch64/AArch64MacroFusion.cpp index f6d693262c3..7919b681dcb 100644 --- a/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -129,6 +129,31 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII, SecondOpcode == AArch64::INSTRUCTION_LIST_END; } + if (ST.hasFuseLiterals()) + // Fuse literal generation operations. + switch (FirstOpcode) { + // PC relative address. + case AArch64::ADRP: + return SecondOpcode == AArch64::ADDXri || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // 32 bit immediate. + case AArch64::MOVZWi: + return (SecondOpcode == AArch64::MOVKWi && + Second->getOperand(3).getImm() == 16) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // Lower half of 64 bit immediate. + case AArch64::MOVZXi: + return (SecondOpcode == AArch64::MOVKXi && + Second->getOperand(3).getImm() == 16) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // Upper half of 64 bit immediate. + case AArch64::MOVKXi: + return First->getOperand(3).getImm() == 32 && + ((SecondOpcode == AArch64::MOVKXi && + Second->getOperand(3).getImm() == 48) || + SecondOpcode == AArch64::INSTRUCTION_LIST_END); + } + return false; } diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 26eac3c6349..a5b88ab2619 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -85,6 +85,7 @@ protected: bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; bool HasFuseAES = false; + bool HasFuseLiterals = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; uint8_t MaxInterleaveFactor = 2; @@ -199,6 +200,7 @@ public: bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAES() const { return HasFuseAES; } + bool hasFuseLiterals() const { return HasFuseLiterals; } bool useRSqrt() const { return UseRSqrt; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 63a1acab4e4..3368c984187 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -330,6 +330,20 @@ public: return DAG; } + ScheduleDAGInstrs * + createPostMachineScheduler(MachineSchedContext *C) const override { + const AArch64Subtarget &ST = C->MF->getSubtarget(); + if (ST.hasFuseLiterals()) { + // Run the Macro Fusion after RA again since literals are expanded from + // pseudos then (v. addPreSched2()). + ScheduleDAGMI *DAG = createGenericSchedPostRA(C); + DAG->addMutation(createAArch64MacroFusionDAGMutation()); + return DAG; + } + + return nullptr; + } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; -- 2.50.1