void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
const MemOpQueue &MemOps, unsigned DefReg,
unsigned RangeBegin, unsigned RangeEnd);
- void UpdateBaseRegUses(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base, unsigned WordOffset,
- ARMCC::CondCodes Pred, unsigned PredReg);
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
-static unsigned getImmScale(unsigned Opc) {
- switch (Opc) {
- default: llvm_unreachable("Unhandled opcode!");
- case ARM::tLDRi:
- case ARM::tSTRi:
- return 1;
- case ARM::tLDRHi:
- case ARM::tSTRHi:
- return 2;
- case ARM::tLDRBi:
- case ARM::tSTRBi:
- return 4;
- }
-}
-
-/// Update future uses of the base register with the offset introduced
-/// due to writeback. This function only works on Thumb1.
-void
-ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc dl, unsigned Base,
- unsigned WordOffset,
- ARMCC::CondCodes Pred, unsigned PredReg) {
- assert(isThumb1 && "Can only update base register uses for Thumb1!");
-
- // Start updating any instructions with immediate offsets. Insert a sub before
- // the first non-updateable instruction (if any).
- for (; MBBI != MBB.end(); ++MBBI) {
- if (MBBI->readsRegister(Base)) {
- unsigned Opc = MBBI->getOpcode();
- int Offset;
- bool InsertSub = false;
-
- if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
- Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
- Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
- // Loads and stores with immediate offsets can be updated, but only if
- // the new offset isn't negative.
- // The MachineOperand containing the offset immediate is the last one
- // before predicates.
- MachineOperand &MO =
- MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
- // The offsets are scaled by 1, 2 or 4 depending on the Opcode
- Offset = MO.getImm() - WordOffset * getImmScale(Opc);
- if (Offset >= 0)
- MO.setImm(Offset);
- else
- InsertSub = true;
-
- } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
- // SUB/ADD using this register. Merge it with the update.
- // If the merged offset is too large, insert a new sub instead.
- MachineOperand &MO =
- MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
- Offset = (Opc == ARM::tSUBi8) ?
- MO.getImm() + WordOffset * 4 :
- MO.getImm() - WordOffset * 4 ;
- if (TL->isLegalAddImmediate(Offset)) {
- MO.setImm(Offset);
- // The base register has now been reset, so exit early.
- return;
- } else {
- InsertSub = true;
- }
-
- } else {
- // Can't update the instruction.
- InsertSub = true;
- }
-
- if (InsertSub) {
- // An instruction above couldn't be updated, so insert a sub.
- AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
- .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
- .addImm(Pred).addReg(PredReg);
- return;
- }
- }
-
- if (MBBI->killsRegister(Base))
- // Register got killed. Stop updating.
- return;
- }
-
- // The end of the block was reached. This means register liveness escapes the
- // block, and it's necessary to insert a sub before the last instruction.
- if (MBB.succ_size() > 0)
- // But only insert the SUB if there is actually a successor block.
- // FIXME: Check more carefully if register is live at this point, e.g. by
- // also examining the successor block's register liveness information.
- AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
- .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
- .addImm(Pred).addReg(PredReg);
-}
-
/// MergeOps - Create and insert a LDM or STM with Base as base register and
/// registers in Regs as the register operands that would be loaded / stored.
/// It returns true if the transformation is done.
break;
}
+ // If the merged instruction has writeback and the base register is not killed
+ // it's not safe to do the merge on Thumb1. This is because resetting the base
+ // register writeback by inserting a SUBS sets the condition flags.
+ // FIXME: Try something clever here to see if resetting the base register can
+ // be avoided, e.g. by updating a later ADD/SUB of the base register with the
+ // writeback.
+ if (isThumb1 && Writeback && !BaseKill) return false;
+
MachineInstrBuilder MIB;
if (Writeback) {
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
-
- // The base isn't dead after a merged instruction with writeback. Update
- // future uses of the base with the added offset (if possible), or reset
- // the base register as necessary.
- if (!BaseKill)
- UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
isThumb2 = AFI->isThumb2Function();
isThumb1 = AFI->isThumbFunction() && !isThumb2;
- // FIXME: Temporarily disabling for Thumb-1 due to miscompiles
- if (isThumb1) {
- delete RS;
- return false;
- }
-
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
++MFI) {
-; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
-; XFAIL: *
+; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s
define void @foo(i32* %A) #0 {
entry:
; CHECK-LABEL: foo:
; CHECK: push {r7, lr}
-; CHECK: ldm [[REG0:r[0-9]]]!,
-; CHECK-NEXT: subs [[REG0]]
+; CHECK: ldr
+; CHECK-NEXT: ldr
; CHECK-NEXT: bl
%0 = load i32* %A, align 4
%arrayidx1 = getelementptr inbounds i32* %A, i32 1
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
--- /dev/null
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m--linux-gnueabi"
+
+; Function Attrs: nounwind optsize
+define void @foo(i32* nocapture readonly %A) #0 {
+entry:
+; CHECK-LABEL: foo:
+; CHECK: ldm r[[BASE:[0-9]]]!,
+; CHECK-NEXT: mov r[[BASE]],
+ %0 = load i32* %A, align 4
+ %arrayidx1 = getelementptr inbounds i32* %A, i32 1
+ %1 = load i32* %arrayidx1, align 4
+ %call = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
+ %call2 = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
+ ret void
+}
+
+; Function Attrs: optsize
+declare i32 @bar(i32, i32, i32, i32) #1
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
--- /dev/null
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-none--eabi"
+
+%struct.S = type { i32, i32 }
+
+@s = common global %struct.S zeroinitializer, align 4
+
+define i32 @f() {
+entry:
+; CHECK-LABEL: f:
+; CHECK: ldm r[[BASE:[0-9]]],
+; CHECK-NEXT-NOT: subs r[[BASE]]
+ %0 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 0), align 4
+ %1 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 1), align 4
+ %cmp = icmp sgt i32 %0, %1
+ %2 = sub i32 0, %1
+ %cond.p = select i1 %cmp, i32 %1, i32 %2
+ %cond = add i32 %cond.p, %0
+ ret i32 %cond
+}
--- /dev/null
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m--linux-gnueabi"
+
+@d = internal unnamed_addr global i32 0, align 4
+@c = internal global i32* null, align 4
+@e = internal unnamed_addr global i32* null, align 4
+
+; Function Attrs: nounwind optsize
+define void @fn1() #0 {
+entry:
+; CHECK-LABEL: fn1:
+; CHECK: stm r[[BASE:[0-9]]]!, {{.*}}
+; CHECK-NOT: {{.*}} r[[BASE]]
+; CHECK: ldr r[[BASE]], {{.*}}
+ %g = alloca i32, align 4
+ %h = alloca i32, align 4
+ store i32 1, i32* %g, align 4
+ store i32 0, i32* %h, align 4
+ %.pr = load i32* @d, align 4
+ %cmp11 = icmp slt i32 %.pr, 1
+ br i1 %cmp11, label %for.inc.lr.ph, label %for.body5
+
+for.inc.lr.ph: ; preds = %entry
+ store i32 1, i32* @d, align 4
+ br label %for.body5
+
+for.body5: ; preds = %entry, %for.inc.lr.ph, %for.body5
+ %f.010 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.inc.lr.ph ], [ 0, %entry ]
+ store volatile i32* %g, i32** @c, align 4
+ %inc7 = add nsw i32 %f.010, 1
+ %exitcond = icmp eq i32 %inc7, 2
+ br i1 %exitcond, label %for.end8, label %for.body5
+
+for.end8: ; preds = %for.body5
+ store i32* %h, i32** @e, align 4
+ ret void
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
-; XFAIL: *
@X = external global [0 x i32] ; <[0 x i32]*> [#uses=5]
; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s
-; XFAIL: *
@d = external global [64 x i32]
@s = external global [64 x i32]
; Function Attrs: nounwind
define void @t1() #0 {
entry:
-; CHECK: ldr [[REG0:r[0-9]]],
-; CHECK: ldm [[REG0]]!,
-; CHECK: ldr [[REG1:r[0-9]]],
-; CHECK: stm [[REG1]]!,
-; CHECK: subs [[REG0]], #32
-; CHECK-NEXT: ldrb
-; CHECK: subs [[REG1]], #32
-; CHECK-NEXT: strb
- tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 33, i32 4, i1 false)
+; CHECK-LABEL: t1
+; CHECK-NOT: ldm
+; CHECK-NOT: stm
+ tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
ret void
}
; Function Attrs: nounwind
define void @t2() #0 {
entry:
-; CHECK: ldr [[REG0:r[0-9]]],
-; CHECK: ldm [[REG0]]!,
-; CHECK: ldr [[REG1:r[0-9]]],
-; CHECK: stm [[REG1]]!,
-; CHECK: ldrh
-; CHECK: ldrb
-; CHECK: strb
-; CHECK: strh
+; CHECK-LABEL: t2:
+; CHECK-NOT: ldm
+; CHECK-NOT: stm
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
ret void
}