From: Sam Parker Date: Tue, 25 Jun 2019 15:11:17 +0000 (+0000) Subject: [ARM] Fix for DLS/LE CodeGen X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b0917b06be3b09f5693c139ae36cc55ade28f209;p=llvm [ARM] Fix for DLS/LE CodeGen The expensive buildbots highlighted the mir tests were broken, which I've now updated and added --verify-machineinstrs to them. This also uncovered a couple of bugs in the backend pass, so these have also been fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364323 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/ARM/ARMLowOverheadLoops.cpp b/lib/Target/ARM/ARMLowOverheadLoops.cpp index b7f3e5bd350..6a3709dc03f 100644 --- a/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -74,8 +74,8 @@ INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) bool ARMLowOverheadLoops::runOnMachineFunction(MachineFunction &MF) { - //if (!static_cast(MF.getSubtarget()).hasLOB()) - //return false; + if (!static_cast(MF.getSubtarget()).hasLOB()) + return false; LLVM_DEBUG(dbgs() << "ARM Loops on " << MF.getName() << " ------------- \n"); @@ -133,16 +133,15 @@ bool ARMLowOverheadLoops::ProcessLoop(MachineLoop *ML) { Dec = &MI; else if (MI.getOpcode() == ARM::t2LoopEnd) End = &MI; + else if (MI.getDesc().isCall()) + // TODO: Though the call will require LE to execute again, does this + // mean we should revert? Always executing LE hopefully should be + // faster than performing a sub,cmp,br or even subs,br. + Revert = true; if (!Dec) continue; - // TODO: Though the call will require LE to execute again, does this - // mean we should revert? Always executing LE hopefully should be faster - // than performing a sub,cmp,br or even subs,br. - if (MI.getDesc().isCall()) - Revert = true; - // If we find that we load/store LR between LoopDec and LoopEnd, expect // that the decremented value has been spilled to the stack. Because // this value isn't actually going to be produced until the latch, by LE, @@ -272,11 +271,13 @@ void ARMLowOverheadLoops::Expand(MachineLoop *ML, MachineInstr *Start, MIB.addReg(ARM::LR); MIB.addImm(0); MIB.addImm(ARMCC::AL); + MIB.addReg(ARM::CPSR); // Create bne MIB = BuildMI(*MBB, End, End->getDebugLoc(), TII->get(ARM::t2Bcc)); MIB.add(End->getOperand(1)); // branch target MIB.addImm(ARMCC::NE); // condition code + MIB.addReg(ARM::CPSR); End->eraseFromParent(); Dec->eraseFromParent(); }; diff --git a/test/Transforms/HardwareLoops/ARM/revert-after-call.mir b/test/Transforms/HardwareLoops/ARM/revert-after-call.mir index f334a2d3c5b..fd8170e14f4 100644 --- a/test/Transforms/HardwareLoops/ARM/revert-after-call.mir +++ b/test/Transforms/HardwareLoops/ARM/revert-after-call.mir @@ -1,18 +1,14 @@ -# RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s -# CHECK: .LBB0_2: -# CHECK: sub.w lr, lr, #1 -# CHECK: mov [[TMP:r[0-9]+]], lr -# CHECK: bl bar -# CHECK: mov lr, [[TMP]] -# CHECK: cmp.w lr, #0 -# CHECK: bne{{.*}} .LBB0_2 +# CHECK: while.body: +# CHECK-NOT: t2DLS +# CHECK-NOT: t2LEUpdate --- | target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8.1m.main-arm-none-eabi" - define i32 @skip_call(i32 %n) #0 { + define i32 @skip_spill(i32 %n) #0 { entry: %cmp6 = icmp eq i32 %n, 0 br i1 %cmp6, label %while.end, label %while.body.preheader @@ -38,7 +34,6 @@ declare i32 @bar(...) local_unnamed_addr #0 declare void @llvm.set.loop.iterations.i32(i32) #1 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - declare void @llvm.stackprotector(i8*, i8**) #2 attributes #0 = { "target-features"="+mve.fp" } attributes #1 = { noduplicate nounwind } @@ -46,14 +41,14 @@ ... --- -name: skip_call +name: skip_spill alignment: 1 exposesReturnsTwice: false legalized: false regBankSelected: false selected: false failedISel: false -tracksRegLiveness: true +tracksRegLiveness: false hasWinCFI: false registers: [] liveins: @@ -95,47 +90,41 @@ constants: [] machineFunctionInfo: {} body: | bb.0.entry: - successors: %bb.1(0x30000000), %bb.3(0x50000000) - liveins: $r0, $r4, $r5, $r7, $lr + successors: %bb.4(0x30000000), %bb.1(0x50000000) - $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr + frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 16 frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r7, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - t2CMPri $r0, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.1, 0, killed $cpsr + tCBZ $r0, %bb.4 - bb.3.while.body.preheader: - successors: %bb.4(0x80000000) - liveins: $r0 + bb.1.while.body.preheader: + successors: %bb.2(0x80000000) $lr = tMOVr $r0, 14, $noreg - renamable $r4 = t2MOVi 0, 14, $noreg, $noreg + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg t2DoLoopStart killed $r0 - bb.4.while.body: - successors: %bb.4(0x7c000000), %bb.2(0x04000000) - liveins: $lr, $r4 + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) - renamable $lr = t2LoopDec killed renamable $lr, 1 $r5 = tMOVr killed $lr, 14, $noreg tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 $lr = tMOVr killed $r5, 14, $noreg - renamable $r4 = nsw t2ADDrr killed renamable $r0, killed renamable $r4, 14, $noreg, $noreg - t2LoopEnd renamable $lr, %bb.4 - t2B %bb.2, 14, $noreg - - bb.2.while.end: - liveins: $r4 + renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + bb.3.while.end: $r0 = tMOVr killed $r4, 14, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - bb.1: - renamable $r4 = t2MOVi 0, 14, $noreg, $noreg + bb.4: + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg $r0 = tMOVr killed $r4, 14, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 ... diff --git a/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir b/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir index 63310f2b4c5..fd8170e14f4 100644 --- a/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir +++ b/test/Transforms/HardwareLoops/ARM/revert-after-spill.mir @@ -1,11 +1,8 @@ -# RUN: llc -mtriple=thumbv8.1m.main %s -o - | FileCheck %s +# RUN: llc -mtriple=thumbv8.1m.main %s -run-pass=arm-low-overhead-loops --verify-machineinstrs -o - | FileCheck %s -# CHECK: .LBB0_2: -# CHECK: sub.w lr, lr, #1 -# CHECK: str.w lr, [sp, #12] -# CHECK: ldr.w lr, [sp, #12] -# CHECK: cmp.w lr, #0 -# CHECK: bne{{.*}} .LBB0_2 +# CHECK: while.body: +# CHECK-NOT: t2DLS +# CHECK-NOT: t2LEUpdate --- | target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" @@ -37,7 +34,6 @@ declare i32 @bar(...) local_unnamed_addr #0 declare void @llvm.set.loop.iterations.i32(i32) #1 declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #1 - declare void @llvm.stackprotector(i8*, i8**) #2 attributes #0 = { "target-features"="+mve.fp" } attributes #1 = { noduplicate nounwind } @@ -52,7 +48,7 @@ legalized: false regBankSelected: false selected: false failedISel: false -tracksRegLiveness: true +tracksRegLiveness: false hasWinCFI: false registers: [] liveins: @@ -94,46 +90,41 @@ constants: [] machineFunctionInfo: {} body: | bb.0.entry: - successors: %bb.1(0x30000000), %bb.3(0x50000000) - liveins: $r0, $r4, $r5, $r7, $lr + successors: %bb.4(0x30000000), %bb.1(0x50000000) - $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr + frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp frame-setup CFI_INSTRUCTION def_cfa_offset 16 frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r7, -8 frame-setup CFI_INSTRUCTION offset $r5, -12 frame-setup CFI_INSTRUCTION offset $r4, -16 - t2CMPri $r0, 0, 14, $noreg, implicit-def $cpsr - t2Bcc %bb.1, 0, killed $cpsr + tCBZ $r0, %bb.4 - bb.3.while.body.preheader: - successors: %bb.4(0x80000000) - liveins: $r0 + bb.1.while.body.preheader: + successors: %bb.2(0x80000000) $lr = tMOVr $r0, 14, $noreg - renamable $r4 = t2MOVi 0, 14, $noreg, $noreg + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg t2DoLoopStart killed $r0 - bb.4.while.body: - successors: %bb.4(0x7c000000), %bb.2(0x04000000) - liveins: $lr, $r4 + bb.2.while.body: + successors: %bb.2(0x7c000000), %bb.3(0x04000000) + $r5 = tMOVr killed $lr, 14, $noreg + tBL 14, $noreg, @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $r0 + $lr = tMOVr killed $r5, 14, $noreg + renamable $r4 = nsw tADDhirr killed renamable $r4, killed renamable $r0, 14, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 - t2STRi12 $lr, %stack.0, 0, 14, $noreg :: (store 4) - $lr = t2LDRi12 %stack.0, 0, 14, $noreg :: (load 4) - renamable $r4 = nsw t2ADDrr renamable $lr, killed renamable $r4, 14, $noreg, $noreg - t2LoopEnd renamable $lr, %bb.4 - t2B %bb.2, 14, $noreg - - bb.2.while.end: - liveins: $r4 + t2LoopEnd renamable $lr, %bb.2 + tB %bb.3, 14, $noreg + bb.3.while.end: $r0 = tMOVr killed $r4, 14, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 - bb.1: - renamable $r4 = t2MOVi 0, 14, $noreg, $noreg + bb.4: + renamable $r4, dead $cpsr = tMOVi8 0, 14, $noreg $r0 = tMOVr killed $r4, 14, $noreg - $sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc, implicit killed $r0 ...