def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
+// Fast execution of bottom and top halves of literal generation
+def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
// The way of reading thread pointer
def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true",
"Reading thread pointer from register">;
namespace llvm {
+// Fuse AES crypto encoding or decoding.
+static bool isAESPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ switch(SecondOpcode) {
+ // AES encode.
+ case ARM::AESMC :
+ return FirstOpcode == ARM::AESE ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ // AES decode.
+ case ARM::AESIMC:
+ return FirstOpcode == ARM::AESD ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ }
+
+ return false;
+}
+
+// Fuse literal generation.
+static bool isLiteralsPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ // 32 bit immediate.
+ if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
+ FirstOpcode == ARM::MOVi16) &&
+ SecondOpcode == ARM::MOVTi16)
+ return true;
+
+ return false;
+}
+
/// Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
const MachineInstr &SecondMI) {
const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI);
- // Assume wildcards for unspecified instrs.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- if (ST.hasFuseAES())
- // Fuse AES crypto operations.
- switch(SecondOpcode) {
- // AES encode.
- case ARM::AESMC :
- return FirstOpcode == ARM::AESE ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
- // AES decode.
- case ARM::AESIMC:
- return FirstOpcode == ARM::AESD ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
- }
+ if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
+ return true;
+ if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
+ return true;
return false;
}
/// pairs faster.
bool HasFuseAES = false;
+ /// HasFuseLiterals - if true, processor executes back to back
+ /// bottom and top halves of literal generation faster.
+ bool HasFuseLiterals = false;
+
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
bool hasFullFP16() const { return HasFullFP16; }
bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
- bool hasFusion() const { return hasFuseAES(); }
+ bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
const Triple &getTargetTriple() const { return TargetTriple; }
--- /dev/null
+; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=-fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT
+; RUN: llc %s -o - -mtriple=armv8-unknown -mattr=+fuse-literals,+use-misched | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE
+
+@g = common global i32* zeroinitializer
+
+define i32* @litp(i32 %a, i32 %b) {
+entry:
+ %add = add nsw i32 %b, %a
+ %ptr = getelementptr i32, i32* bitcast (i32* (i32, i32)* @litp to i32*), i32 %add
+ %res = getelementptr i32, i32* bitcast (i32** @g to i32*), i32 %add
+ store i32* %ptr, i32** @g, align 4
+ ret i32* %res
+
+; CHECK-LABEL: litp:
+; CHECK: movw [[R:r[0-9]+]], :lower16:litp
+; CHECKDONT-NEXT: movw [[S:r[0-9]+]], :lower16:g
+; CHECKFUSE-NEXT: movt [[R]], :upper16:litp
+; CHECKFUSE-NEXT: movw [[S:r[0-9]+]], :lower16:g
+; CHECKFUSE-NEXT: movt [[S]], :upper16:g
+}
+
+define i32 @liti(i32 %a, i32 %b) {
+entry:
+ %adda = add i32 %a, -262095121
+ %add1 = add i32 %adda, %b
+ %addb = add i32 %b, 121110837
+ %add2 = add i32 %addb, %a
+ store i32 %add1, i32* bitcast (i32** @g to i32*), align 4
+ ret i32 %add2
+
+; CHECK-LABEL: liti:
+; CHECK: movw [[R:r[0-9]+]], #309
+; CHECKDONT-NEXT: add {{r[0-9]+}}, {{r[0-9]+}}, {{r[0-9]+}}
+; CHECKFUSE-NEXT: movt [[R]], #1848
+; CHECKFUSE: movw [[S:r[0-9]+]], :lower16:g
+; CHECKFUSE-NEXT: movt [[S]], :upper16:g
+; CHECKFUSE-NEXT: movw [[T:r[0-9]+]], #48879
+; CHECKFUSE-NEXT: movt [[T]], #61536
+}