From 617c526c5c6a4172796b3f8652cbc909df5c8cd9 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 2 Mar 2017 21:39:39 +0000 Subject: [PATCH] [ARM] Fix insert point for store rescheduling. In ARMPreAllocLoadStoreOpt::RescheduleOps, LastOp should be the last operation which we want to merge. If we break out of the loop because an operation has the wrong offset, we shouldn't use that operation as LastOp. This patch fixes some cases where we would move stores to the wrong insert point. Re-commit with a fix to increment NumMove in the right place. Differential Revision: https://reviews.llvm.org/D30124 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296815 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 31 +++++++++++-------- test/CodeGen/ARM/ldm-stm-i256.ll | 38 ++++++++++++++++++++++++ test/CodeGen/ARM/prera-ldst-insertpt.mir | 32 ++++++++++++-------- 3 files changed, 76 insertions(+), 25 deletions(-) create mode 100644 test/CodeGen/ARM/ldm-stm-i256.ll diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index f9eec7d4006..032123b48e9 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2161,33 +2161,40 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, unsigned LastBytes = 0; unsigned NumMove = 0; for (int i = Ops.size() - 1; i >= 0; --i) { + // Make sure each operation has the same kind. MachineInstr *Op = Ops[i]; - unsigned Loc = MI2LocMap[Op]; - if (Loc <= FirstLoc) { - FirstLoc = Loc; - FirstOp = Op; - } - if (Loc >= LastLoc) { - LastLoc = Loc; - LastOp = Op; - } - unsigned LSMOpcode = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia); if (LastOpcode && LSMOpcode != LastOpcode) break; + // Check that we have a continuous set of offsets. int Offset = getMemoryOpOffset(*Op); unsigned Bytes = getLSMultipleTransferSize(Op); if (LastBytes) { if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) break; } + + // Don't try to reschedule too many instructions. + if (NumMove == 8) // FIXME: Tune this limit. + break; + + // Found a mergable instruction; save information about it. + ++NumMove; LastOffset = Offset; LastBytes = Bytes; LastOpcode = LSMOpcode; - if (++NumMove == 8) // FIXME: Tune this limit. - break; + + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } } if (NumMove <= 1) diff --git a/test/CodeGen/ARM/ldm-stm-i256.ll b/test/CodeGen/ARM/ldm-stm-i256.ll new file mode 100644 index 00000000000..7b4151dabf6 --- /dev/null +++ b/test/CodeGen/ARM/ldm-stm-i256.ll @@ -0,0 +1,38 @@ +; RUN: llc -mtriple=armv7--eabi -verify-machineinstrs < %s | FileCheck %s + +; Check the way we schedule/merge a bunch of loads and stores. +; Originally test/CodeGen/ARM/2011-07-07-ScheduleDAGCrash.ll ; now +; being used as a test of optimizations related to ldm/stm. + +; FIXME: We could merge more loads/stores with regalloc hints. +; FIXME: Fix scheduling so we don't have 16 live registers. + +define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp { +entry: + %c = load i256, i256* %cc + %d = load i256, i256* %dd + %add = add nsw i256 %c, %d + store i256 %add, i256* %a, align 8 + %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376 + %add6 = add nsw i256 %or, %d + store i256 %add6, i256* %b, align 8 + ret void + ; CHECK-DAG: ldm r3 + ; CHECK-DAG: ldm r2 + ; CHECK-DAG: ldr {{.*}}, [r3, #20] + ; CHECK-DAG: ldr {{.*}}, [r3, #16] + ; CHECK-DAG: ldr {{.*}}, [r3, #28] + ; CHECK-DAG: ldr {{.*}}, [r3, #24] + ; CHECK-DAG: ldr {{.*}}, [r2, #20] + ; CHECK-DAG: ldr {{.*}}, [r2, #16] + ; CHECK-DAG: ldr {{.*}}, [r2, #28] + ; CHECK-DAG: ldr {{.*}}, [r2, #24] + ; CHECK-DAG: stmib r0 + ; CHECK-DAG: str {{.*}}, [r0] + ; CHECK-DAG: str {{.*}}, [r0, #24] + ; CHECK-DAG: str {{.*}}, [r0, #28] + ; CHECK-DAG: str {{.*}}, [r1] + ; CHECK-DAG: stmib r1 + ; CHECK-DAG: str {{.*}}, [r1, #24] + ; CHECK-DAG: str {{.*}}, [r1, #28] +} diff --git a/test/CodeGen/ARM/prera-ldst-insertpt.mir b/test/CodeGen/ARM/prera-ldst-insertpt.mir index 1db38c8b004..eafcc7c36d3 100644 --- a/test/CodeGen/ARM/prera-ldst-insertpt.mir +++ b/test/CodeGen/ARM/prera-ldst-insertpt.mir @@ -36,6 +36,14 @@ body: | t2STRi12 %1, %0, 0, 14, _ :: (store 4) %10 : rgpr = t2LSLri %2, 1, 14, _, _ t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other, and + ; insert them in an appropriate location. + ; CHECK: t2STRi12 %1, + ; CHECK-NEXT: t2STRi12 killed %10, + ; CHECK-NEXT: t2MOVi + ; CHECK-NEXT: t2ADDrs + %11 : rgpr = t2MOVi 55, 14, _, _ %12 : gprnopc = t2ADDrs %11, killed %7, 19, 14, _, _ t2STRi12 killed %12, %0, 16, 14, _ :: (store 4) @@ -43,12 +51,7 @@ body: | t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) ; Make sure we move the paired stores next to each other. - ; FIXME: Make sure we don't extend the live-range of a store - ; when we don't need to. - ; CHECK: t2STRi12 %1, - ; CHECK-NEXT: t2STRi12 killed %10, - ; CHECK-NEXT: %13 = t2ADDrs %11 - ; CHECK-NEXT: t2STRi12 killed %12, + ; CHECK: t2STRi12 killed %12, ; CHECK-NEXT: t2STRi12 killed %13, tBX_RET 14, _ @@ -73,6 +76,15 @@ body: | t2STRi12 killed %10, %0, 4, 14, _ :: (store 4) %3 : rgpr = t2MUL %2, %2, 14, _ t2STRi12 %3, %0, 8, 14, _ :: (store 4) + + ; Make sure we move the paired stores next to each other, and + ; insert them in an appropriate location. + ; CHECK: t2STRi12 {{.*}}, 0 + ; CHECK-NEXT: t2STRi12 {{.*}}, 4 + ; CHECK-NEXT: t2STRi12 {{.*}}, 8 + ; CHECK-NEXT: t2MUL + ; CHECK-NEXT: t2MOVi32imm + %4 : rgpr = t2MUL %1, %1, 14, _ %5 : rgpr = t2MOVi32imm -858993459 %6 : rgpr, %7 : rgpr = t2UMULL killed %3, %5, 14, _ @@ -85,13 +97,7 @@ body: | t2STRi12 killed %13, %0, 20, 14, _ :: (store 4) ; Make sure we move the paired stores next to each other. - ; FIXME: Make sure we don't extend the live-range of a store - ; when we don't need to. - ; CHECK: t2STRi12 {{.*}}, 0 - ; CHECK-NEXT: t2STRi12 {{.*}}, 4 - ; CHECK-NEXT: t2STRi12 {{.*}}, 8 - ; CHECK-NEXT: t2ADDrs - ; CHECK-NEXT: t2STRi12 {{.*}}, 16 + ; CHECK: t2STRi12 {{.*}}, 16 ; CHECK-NEXT: t2STRi12 {{.*}}, 20 tBX_RET 14, _ -- 2.40.0