]> granicus.if.org Git - llvm/commitdiff
[AArch64] Add new target feature to fuse literal generation
authorEvandro Menezes <e.menezes@samsung.com>
Wed, 1 Feb 2017 02:54:42 +0000 (02:54 +0000)
committerEvandro Menezes <e.menezes@samsung.com>
Wed, 1 Feb 2017 02:54:42 +0000 (02:54 +0000)
This feature enables the fusion of such operations on Cortex A57, as
recommended in its Software Optimisation Guide, sections 4.14 and 4.15.

Differential revision: https://reviews.llvm.org/D28698

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293739 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AArch64/AArch64.td
lib/Target/AArch64/AArch64MacroFusion.cpp
lib/Target/AArch64/AArch64Subtarget.h
lib/Target/AArch64/AArch64TargetMachine.cpp

index 1f21da75681aaeb224b358a704740dff520aeb9c..deeb98b7f5529d9e8bf64f6bc9ea72cfc259454a 100644 (file)
@@ -107,6 +107,10 @@ def FeatureFuseAES : SubtargetFeature<
     "fuse-aes", "HasFuseAES", "true",
     "CPU fuses AES crypto operations">;
 
+def FeatureFuseLiterals : SubtargetFeature<
+    "fuse-literals", "HasFuseLiterals", "true",
+    "CPU fuses literal generation operations">;
+
 def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
     "Disable latency scheduling heuristic">;
@@ -189,6 +193,7 @@ def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                    FeatureCustomCheapAsMoveHandling,
                                    FeatureFPARMv8,
                                    FeatureFuseAES,
+                                   FeatureFuseLiterals,
                                    FeatureNEON,
                                    FeaturePerfMon,
                                    FeaturePostRAScheduler,
index f6d693262c363e0ee1b21d9aa14e02c3118a8757..7919b681dcb9690c3736dd0c2907a750c802c4c1 100644 (file)
@@ -129,6 +129,31 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
              SecondOpcode == AArch64::INSTRUCTION_LIST_END;
     }
 
+  if (ST.hasFuseLiterals())
+    // Fuse literal generation operations.
+    switch (FirstOpcode) {
+    // PC relative address.
+    case AArch64::ADRP:
+      return SecondOpcode == AArch64::ADDXri ||
+             SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+    // 32 bit immediate.
+    case AArch64::MOVZWi:
+      return (SecondOpcode == AArch64::MOVKWi &&
+              Second->getOperand(3).getImm() == 16) ||
+             SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+    // Lower half of 64 bit immediate.
+    case AArch64::MOVZXi:
+      return (SecondOpcode == AArch64::MOVKXi &&
+              Second->getOperand(3).getImm() == 16) ||
+             SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+    // Upper half of 64 bit immediate.
+    case AArch64::MOVKXi:
+      return First->getOperand(3).getImm() == 32 &&
+             ((SecondOpcode == AArch64::MOVKXi &&
+               Second->getOperand(3).getImm() == 48) ||
+              SecondOpcode == AArch64::INSTRUCTION_LIST_END);
+    }
+
   return false;
 }
 
index 26eac3c63493fb2f598926f6f0ba6b017b47cb6b..a5b88ab26199ee7e7f1e084f70c0f929bb9bedbf 100644 (file)
@@ -85,6 +85,7 @@ protected:
   bool HasArithmeticBccFusion = false;
   bool HasArithmeticCbzFusion = false;
   bool HasFuseAES = false;
+  bool HasFuseLiterals = false;
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
   uint8_t MaxInterleaveFactor = 2;
@@ -199,6 +200,7 @@ public:
   bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
   bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
   bool hasFuseAES() const { return HasFuseAES; }
+  bool hasFuseLiterals() const { return HasFuseLiterals; }
   bool useRSqrt() const { return UseRSqrt; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   unsigned getVectorInsertExtractBaseCost() const {
index 63a1acab4e44d0e7b501ea0cc78bad6d7b5bdb9d..3368c98418791fabee57c966d22cc0732ddb3478 100644 (file)
@@ -330,6 +330,20 @@ public:
     return DAG;
   }
 
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+    if (ST.hasFuseLiterals()) {
+      // Run the Macro Fusion after RA again since literals are expanded from
+      // pseudos then (v. addPreSched2()).
+      ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+      DAG->addMutation(createAArch64MacroFusionDAGMutation());
+      return DAG;
+    }
+
+    return nullptr;
+  }
+
   void addIRPasses()  override;
   bool addPreISel() override;
   bool addInstSelector() override;