This feature enables the fusion of such operations on Cortex A57, as
recommended in its Software Optimisation Guide, sections 4.14 and 4.15.
Differential revision: https://reviews.llvm.org/D28698
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@293739
91177308-0d34-0410-b5e6-
96231b3b80d8
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
+def FeatureFuseLiterals : SubtargetFeature<
+ "fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
FeatureFuseAES,
+ FeatureFuseLiterals,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
}
+ if (ST.hasFuseLiterals())
+ // Fuse literal generation operations.
+ switch (FirstOpcode) {
+ // PC relative address.
+ case AArch64::ADRP:
+ return SecondOpcode == AArch64::ADDXri ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // 32 bit immediate.
+ case AArch64::MOVZWi:
+ return (SecondOpcode == AArch64::MOVKWi &&
+ Second->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Lower half of 64 bit immediate.
+ case AArch64::MOVZXi:
+ return (SecondOpcode == AArch64::MOVKXi &&
+ Second->getOperand(3).getImm() == 16) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END;
+ // Upper half of 64 bit immediate.
+ case AArch64::MOVKXi:
+ return First->getOperand(3).getImm() == 32 &&
+ ((SecondOpcode == AArch64::MOVKXi &&
+ Second->getOperand(3).getImm() == 48) ||
+ SecondOpcode == AArch64::INSTRUCTION_LIST_END);
+ }
+
return false;
}
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
bool HasFuseAES = false;
+ bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
uint8_t MaxInterleaveFactor = 2;
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return DAG;
}
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
+ if (ST.hasFuseLiterals()) {
+ // Run the Macro Fusion after RA again since literals are expanded from
+ // pseudos then (v. addPreSched2()).
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ DAG->addMutation(createAArch64MacroFusionDAGMutation());
+ return DAG;
+ }
+
+ return nullptr;
+ }
+
void addIRPasses() override;
bool addPreISel() override;
bool addInstSelector() override;