From: David Green Date: Wed, 23 Jan 2019 10:18:30 +0000 (+0000) Subject: [ARM] Alter the register allocation order for minsize on Thumb2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a812fe91d644592a101587b608b5e87f41f947a3;p=llvm [ARM] Alter the register allocation order for minsize on Thumb2 Currently in Arm code, we allocate LR first, under the assumption that it needs to be saved anyway. Unfortunately this has the disadvantage that it will require any instructions using it to be the longer thumb2 instructions, not the shorter thumb1 ones. This switches the order when we are optimising for minsize, returning to the default order so that more lower registers can be used. It can end up requiring more pushed registers, but on average produces smaller code. Differential Revision: https://reviews.llvm.org/D56008 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351938 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b5179c37621..7b8dc905926 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -204,13 +204,21 @@ def FPINST2 : ARMReg<10, "fpinst2">; def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), SP, LR, PC)> { // Allocate LR as the first CSR since it is always saved anyway. + // For Thumb2, using LR would force 32bit Thumb2 instructions, not the smaller + // Thumb1 ones. It is a little better for codesize on average to use the + // default order. // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't // know how to spill them. If we make our prologue/epilogue code smarter at // some point, we can go back to using the above allocation orders for the // Thumb1 instructions that know how to use hi regs. let AltOrders = [(add LR, GPR), (trunc GPR, 8)]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().isThumb1Only(); + if (MF.getSubtarget().isThumb1Only()) + return 2; + if (MF.getSubtarget().isThumb2() && + MF.getFunction().optForMinSize()) + return 0; + return 1; }]; let DiagnosticString = "operand must be a register in range [r0, r15]"; } @@ -221,7 +229,12 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().isThumb1Only(); + if (MF.getSubtarget().isThumb1Only()) + return 2; + if (MF.getSubtarget().isThumb2() && + MF.getFunction().optForMinSize()) + return 0; + return 1; }]; let DiagnosticString = "operand must be a register in range [r0, r14]"; } @@ -232,7 +245,12 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)> { let AltOrders = [(add LR, GPRnopc), (trunc GPRnopc, 8)]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().isThumb1Only(); + if (MF.getSubtarget().isThumb1Only()) + return 2; + if (MF.getSubtarget().isThumb2() && + MF.getFunction().optForMinSize()) + return 0; + return 1; }]; let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv"; } @@ -253,7 +271,12 @@ def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)> { def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)]; let AltOrderSelect = [{ - return 1 + MF.getSubtarget().isThumb1Only(); + if (MF.getSubtarget().isThumb1Only()) + return 2; + if (MF.getSubtarget().isThumb2() && + MF.getFunction().optForMinSize()) + return 0; + return 1; }]; let DiagnosticType = "rGPR"; } diff --git a/test/CodeGen/Thumb2/reg-order.ll b/test/CodeGen/Thumb2/reg-order.ll new file mode 100644 index 00000000000..d56583727c7 --- /dev/null +++ b/test/CodeGen/Thumb2/reg-order.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s + + +define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: adds r4, r3, r0 +; CHECK-NEXT: add.w r12, r2, r1 +; CHECK-NEXT: add r0, r1 +; CHECK-NEXT: adds r1, r3, r2 +; CHECK-NEXT: mul r4, r4, r12 +; CHECK-NEXT: mla r0, r1, r0, r4 +; CHECK-NEXT: pop {r4, pc} +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %d, %c + %mul = mul nsw i32 %add1, %add + %add2 = add nsw i32 %d, %a + %add3 = add nsw i32 %c, %b + %mul4 = mul nsw i32 %add2, %add3 + %add5 = add nsw i32 %mul, %mul4 + ret i32 %add5 +} + +define void @loop(i32 %I, i8* %A, i8* %B) #0 { +; CHECK-LABEL: loop: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: b .LBB1_2 +; CHECK-NEXT: .LBB1_1: @ %for.body +; CHECK-NEXT: @ in Loop: Header=BB1_2 Depth=1 +; CHECK-NEXT: add.w r4, r12, r12, lsl #1 +; CHECK-NEXT: add.w r3, r2, r12, lsl #2 +; CHECK-NEXT: add r4, r1 +; CHECK-NEXT: add.w r12, r12, #1 +; CHECK-NEXT: ldrsb.w r6, [r4, #2] +; CHECK-NEXT: ldrsb.w r5, [r4] +; CHECK-NEXT: mov r7, r6 +; CHECK-NEXT: cmp r5, r6 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r7, r5 +; CHECK-NEXT: ldrsb.w r4, [r4, #1] +; CHECK-NEXT: cmp r7, r4 +; CHECK-NEXT: it le +; CHECK-NEXT: movle r7, r4 +; CHECK-NEXT: subs r4, r7, r4 +; CHECK-NEXT: subs r6, r7, r6 +; CHECK-NEXT: strb r6, [r3, #3] +; CHECK-NEXT: strb r4, [r3, #2] +; CHECK-NEXT: subs r4, r7, r5 +; CHECK-NEXT: strb r4, [r3, #1] +; CHECK-NEXT: mvns r4, r7 +; CHECK-NEXT: strb r4, [r3] +; CHECK-NEXT: .LBB1_2: @ %for.cond +; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: cmp r12, r0 +; CHECK-NEXT: blt .LBB1_1 +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %A.addr.0 = phi i8* [ %A, %entry ], [ %incdec.ptr2, %for.body ] + %B.addr.0 = phi i8* [ %B, %entry ], [ %incdec.ptr47, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %I + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %for.cond + %incdec.ptr = getelementptr inbounds i8, i8* %A.addr.0, i32 1 + %0 = load i8, i8* %A.addr.0, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %A.addr.0, i32 2 + %1 = load i8, i8* %incdec.ptr, align 1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %A.addr.0, i32 3 + %2 = load i8, i8* %incdec.ptr1, align 1 + %3 = icmp sgt i8 %0, %2 + %4 = select i1 %3, i8 %0, i8 %2 + %5 = icmp sgt i8 %4, %1 + %6 = select i1 %5, i8 %4, i8 %1 + %7 = xor i8 %6, -1 + %sub34 = sub i8 %6, %0 + %sub38 = sub i8 %6, %1 + %sub42 = sub i8 %6, %2 + %incdec.ptr44 = getelementptr inbounds i8, i8* %B.addr.0, i32 1 + store i8 %7, i8* %B.addr.0, align 1 + %incdec.ptr45 = getelementptr inbounds i8, i8* %B.addr.0, i32 2 + store i8 %sub34, i8* %incdec.ptr44, align 1 + %incdec.ptr46 = getelementptr inbounds i8, i8* %B.addr.0, i32 3 + store i8 %sub38, i8* %incdec.ptr45, align 1 + %incdec.ptr47 = getelementptr inbounds i8, i8* %B.addr.0, i32 4 + store i8 %sub42, i8* %incdec.ptr46, align 1 + %inc = add nuw nsw i32 %i.0, 1 + br label %for.cond + +for.cond.cleanup: ; preds = %for.cond + ret void +} + + +attributes #0 = { minsize optsize }