return true;
}
-static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
- for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
- MBB->addLiveIn(*I);
-}
-
bool AArch64ExpandPseudo::expandCMP_SWAP(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
- MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Dest = MI.getOperand(0);
unsigned StatusReg = MI.getOperand(1).getReg();
- MachineOperand &Addr = MI.getOperand(2);
- MachineOperand &Desired = MI.getOperand(3);
- MachineOperand &New = MI.getOperand(4);
-
- LivePhysRegs LiveRegs(TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ bool StatusDead = MI.getOperand(1).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(2).getReg();
+ unsigned DesiredReg = MI.getOperand(3).getReg();
+ unsigned NewReg = MI.getOperand(4).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
MF->insert(++StoreBB->getIterator(), DoneBB);
// .Lloadcmp:
+ // mov wStatus, 0
// ldaxr xDest, [xAddr]
// cmp xDest, xDesired
// b.ne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(Dest.getReg());
- LoadCmpBB->addLiveIn(Desired.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
+ if (!StatusDead)
+ BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
+ .addImm(0).addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
- .addReg(Addr.getReg());
+ .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
- .add(Desired)
+ .addReg(DesiredReg)
.addImm(ExtendImm);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
.addImm(AArch64CC::NE)
// .Lstore:
// stlxr wStatus, xNew, [xAddr]
// cbnz wStatus, .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(New.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
-
- BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr);
+ BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
+ .addReg(NewReg)
+ .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute livein lists.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass around the loop to get loop carried registers right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
MachineOperand &DestLo = MI.getOperand(0);
MachineOperand &DestHi = MI.getOperand(1);
unsigned StatusReg = MI.getOperand(2).getReg();
- MachineOperand &Addr = MI.getOperand(3);
- MachineOperand &DesiredLo = MI.getOperand(4);
- MachineOperand &DesiredHi = MI.getOperand(5);
- MachineOperand &NewLo = MI.getOperand(6);
- MachineOperand &NewHi = MI.getOperand(7);
-
- LivePhysRegs LiveRegs(TII->getRegisterInfo());
- LiveRegs.addLiveOuts(MBB);
- for (auto I = std::prev(MBB.end()); I != MBBI; --I)
- LiveRegs.stepBackward(*I);
+ bool StatusDead = MI.getOperand(2).isDead();
+ // Duplicating undef operands into 2 instructions does not guarantee the same
+ // value on both; However undef should be replaced by xzr anyway.
+ assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
+ unsigned AddrReg = MI.getOperand(3).getReg();
+ unsigned DesiredLoReg = MI.getOperand(4).getReg();
+ unsigned DesiredHiReg = MI.getOperand(5).getReg();
+ unsigned NewLoReg = MI.getOperand(6).getReg();
+ unsigned NewHiReg = MI.getOperand(7).getReg();
MachineFunction *MF = MBB.getParent();
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
// cmp xDestLo, xDesiredLo
// sbcs xDestHi, xDesiredHi
// b.ne .Ldone
- LoadCmpBB->addLiveIn(Addr.getReg());
- LoadCmpBB->addLiveIn(DestLo.getReg());
- LoadCmpBB->addLiveIn(DestHi.getReg());
- LoadCmpBB->addLiveIn(DesiredLo.getReg());
- LoadCmpBB->addLiveIn(DesiredHi.getReg());
- addPostLoopLiveIns(LoadCmpBB, LiveRegs);
-
BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
.addReg(DestLo.getReg(), RegState::Define)
.addReg(DestHi.getReg(), RegState::Define)
- .addReg(Addr.getReg());
+ .addReg(AddrReg);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
- .add(DesiredLo)
+ .addReg(DesiredLoReg)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(AArch64::WZR)
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
- .add(DesiredHi)
+ .addReg(DesiredHiReg)
.addImm(0);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
.addUse(StatusReg, RegState::Kill)
.addUse(StatusReg, RegState::Kill)
.addImm(AArch64CC::EQ);
BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
- .addUse(StatusReg, RegState::Kill)
+ .addUse(StatusReg, getKillRegState(StatusDead))
.addMBB(DoneBB);
LoadCmpBB->addSuccessor(DoneBB);
LoadCmpBB->addSuccessor(StoreBB);
// .Lstore:
// stlxp wStatus, xNewLo, xNewHi, [xAddr]
// cbnz wStatus, .Lloadcmp
- StoreBB->addLiveIn(Addr.getReg());
- StoreBB->addLiveIn(NewLo.getReg());
- StoreBB->addLiveIn(NewHi.getReg());
- addPostLoopLiveIns(StoreBB, LiveRegs);
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
- .add(NewLo)
- .add(NewHi)
- .add(Addr);
+ .addReg(NewLoReg)
+ .addReg(NewHiReg)
+ .addReg(AddrReg);
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
- .addReg(StatusReg, RegState::Kill)
+ .addReg(StatusReg, getKillRegState(StatusDead))
.addMBB(LoadCmpBB);
StoreBB->addSuccessor(LoadCmpBB);
StoreBB->addSuccessor(DoneBB);
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
DoneBB->transferSuccessors(&MBB);
- addPostLoopLiveIns(DoneBB, LiveRegs);
MBB.addSuccessor(LoadCmpBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
+
+ // Recompute liveness bottom up.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ LivePhysRegs LiveRegs;
+ computeLiveIns(LiveRegs, MRI, *DoneBB);
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+ // Do an extra pass in the loop to get the loop carried dependencies right.
+ StoreBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *StoreBB);
+ LoadCmpBB->clearLiveIns();
+ computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
+
return true;
}
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_8:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxtb
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxrb [[STATUS:w[3-9]]], w2, [x0]
+; CHECK: stlxrb [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_16:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1, uxth
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_32:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK: cmp [[OLD]], w1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[3-9]]], w2, [x0]
+; CHECK: stlxr [[STATUS]], w2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
; CHECK-LABEL: test_cmpxchg_64:
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
+; CHECK: mov [[STATUS:w[3-9]+]], #0
; CHECK: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK: cmp [[OLD]], x1
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
-; CHECK: stlxr [[STATUS:w[3-9]]], x2, [x0]
+; CHECK: stlxr [[STATUS]], x2, [x0]
; CHECK: cbnz [[STATUS]], [[RETRY]]
; CHECK: [[DONE]]:
; CHECK: subs {{x[0-9]+}}, [[OLD]], x1
; CHECK-LABEL: cmpxchg_monotonic_32:
; CHECK: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, [x0]
+; CHECK-NEXT: stlxr [[STATUS]], w2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK: // BB#0:
; CHECK: ldr [[NEW:w[0-9]+]], [x2]
; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], [x0]
+; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], w1
; CHECK-LABEL: cmpxchg_seq_cst_64:
; CHECK: [[RETRY:.LBB[0-9_]+]]:
+; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0
; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0]
; CHECK-NEXT: cmp [[OLD]], x1
; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]]
; CHECK-NEXT: // BB#2:
-; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], x2, [x0]
+; CHECK-NEXT: stlxr [[STATUS]], x2, [x0]
; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]]
; CHECK-NEXT: [[DONE]]:
; CHECK-NEXT: cmp [[OLD]], x1