/// This is called after a .mir file was loaded.
virtual void mirFileLoaded(MachineFunction &MF) const;
+
+ /// True if the register allocator should use the allocation orders exactly as
+ /// written in the tablegen descriptions, false if it should allocate
+ /// the specified physical register later if is it callee-saved.
+ virtual bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ return false;
+ }
};
} // end namespace llvm
void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
assert(RC && "no register class given");
RCInfo &RCI = RegClass[RC->getID()];
+ auto &STI = MF->getSubtarget();
// Raw register count, including all reserved regs.
unsigned NumRegs = RC->getNumRegs();
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CalleeSavedAliases[PhysReg])
+ if (CalleeSavedAliases[PhysReg] &&
+ !STI.ignoreCSRForAllocationOrder(*MF, PhysReg))
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
// know how to spill them. If we make our prologue/epilogue code smarter at
// some point, we can go back to using the above allocation orders for the
// Thumb1 instructions that know how to use hi regs.
- let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+ let AltOrders = [(add LR, GPR), (trunc GPR, 8),
+ (add (trunc GPR, 8), R12, LR, (shl GPR, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticString = "operand must be a register in range [r0, r15]";
}
// certain operand slots, particularly as the destination. Primarily
// useful for disassembly.
def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> {
- let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)];
+ let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8),
+ (add (trunc GPRnopc, 8), R12, LR, (shl GPRnopc, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticString = "operand must be a register in range [r0, r14]";
}
// or SP (R13 or R15) are used. The ARM ISA refers to these operands
// via the BadReg() pseudo-code description.
def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
- let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+ let AltOrders = [(add LR, rGPR), (trunc rGPR, 8),
+ (add (trunc rGPR, 8), R12, LR, (shl rGPR, 8))];
let AltOrderSelect = [{
- return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only();
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
}];
let DiagnosticType = "rGPR";
}
((isTargetMachO() && !isThumb1Only()) ||
(isTargetLinux() && !isThumb()) || (isTargetNaCl() && !isThumb()));
}
+
+unsigned ARMSubtarget::getGPRAllocationOrder(const MachineFunction &MF) const {
+ // The GPR register class has multiple possible allocation orders, with
+ // tradeoffs preferred by different sub-architectures and optimisation goals.
+ // The allocation orders are:
+ // 0: (the default tablegen order, not used)
+ // 1: r14, r0-r13
+ // 2: r0-r7
+ // 3: r0-r7, r12, lr, r8-r11
+ // Note that the register allocator will change this order so that
+ // callee-saved registers are used later, as they require extra work in the
+ // prologue/epilogue (though we sometimes override that).
+
+ // For thumb1-only targets, only the low registers are allocatable.
+ if (isThumb1Only())
+ return 2;
+
+ // Allocate low registers first, so we can select more 16-bit instructions.
+ // We also (in ignoreCSRForAllocationOrder) override the default behaviour
+ // with regards to callee-saved registers, because pushing extra registers is
+ // much cheaper (in terms of code size) than using high registers. After
+ // that, we allocate r12 (doesn't need to be saved), lr (saving it means we
+ // can return with the pop, don't need an extra "bx lr") and then the rest of
+ // the high registers.
+ if (isThumb2() && MF.getFunction().hasMinSize())
+ return 3;
+
+ // Otherwise, allocate in the default order, using LR first because saving it
+ // allows a shorter epilogue sequence.
+ return 1;
+}
+
+bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const {
+ // To minimize code size in Thumb2, we prefer the usage of low regs (lower
+ // cost per use) so we can use narrow encoding. By default, caller-saved
+ // registers (e.g. lr, r12) are always allocated first, regardless of
+ // their cost per use. When optForMinSize, we prefer the low regs even if
+ // they are CSR because usually push/pop can be folded into existing ones.
+ return isThumb2() && MF.getFunction().hasMinSize() &&
+ ARM::GPRRegClass.contains(PhysReg);
+}
unsigned getPrefLoopAlignment() const {
return PrefLoopAlignment;
}
+
+ bool ignoreCSRForAllocationOrder(const MachineFunction &MF,
+ unsigned PhysReg) const override;
+ unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
};
} // end namespace llvm
while.body:
; CHECK: while.body
-; CHECK: mul r{{[0-9]+}}
+; CHECK: muls r{{[0-9]+}}
; CHECK: muls
%ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
%ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
--- /dev/null
+; REQUIRES: asserts
+; RUN: llc -debug-only=regalloc < %s 2>%t | FileCheck %s --check-prefix=CHECK
+; RUN: FileCheck %s < %t --check-prefix=DEBUG
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "thumbv7m--linux-gnueabi"
+
+
+; DEBUG: AllocationOrder(GPR) = [ $r0 $r1 $r2 $r3 $r4 $r5 $r6 $r7 $r12 $lr $r8 $r9 $r10 $r11 ]
+
+define i32 @test_minsize(i32 %x) optsize minsize {
+; CHECK-LABEL: test_minsize:
+entry:
+; CHECK: mov r4, r0
+ tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
+; CHECK: mov r0, r4
+ ret i32 %x
+}
+
+; DEBUG: AllocationOrder(GPR) = [ $r0 $r1 $r2 $r3 $r12 $lr $r4 $r5 $r6 $r7 $r8 $r9 $r10 $r11 ]
+
+define i32 @test_optsize(i32 %x) optsize {
+; CHECK-LABEL: test_optsize:
+entry:
+; CHECK: mov r12, r0
+ tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3}"()
+; CHECK: mov r0, r12
+ ret i32 %x
+}