Recommit "[mips] Fix atomic compare and swap at O0."

author Simon Dardis <simon.dardis@imgtec.com>

Fri, 24 Feb 2017 16:32:18 +0000 (16:32 +0000)

committer Simon Dardis <simon.dardis@imgtec.com>

Fri, 24 Feb 2017 16:32:18 +0000 (16:32 +0000)
author Simon Dardis <simon.dardis@imgtec.com>
Fri, 24 Feb 2017 16:32:18 +0000 (16:32 +0000)
committer Simon Dardis <simon.dardis@imgtec.com>
Fri, 24 Feb 2017 16:32:18 +0000 (16:32 +0000)
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt

index 3650cc9fe07286a461849a189859a4e3d7f0399d..ea7218229a0a726263b4be3d7944725bb8f8a40d 100644 (file)
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_target(MipsCodeGen
    MipsCCState.cpp
    MipsConstantIslandPass.cpp
    MipsDelaySlotFiller.cpp
+  MipsExpandPseudo.cpp
    MipsFastISel.cpp
    MipsHazardSchedule.cpp
    MipsInstrInfo.cpp
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h

index d9faf3325cacd13d6dbc6c202f8d97d532312017..55cda18a608f3257357fa0f9ecd88a8c16c8db66 100644 (file)
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -32,6 +32,7 @@ namespace llvm {
    FunctionPass *createMipsHazardSchedule();
    FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
    FunctionPass *createMipsConstantIslandPass();
+  FunctionPass *createMipsExpandPseudoPass();
  } // end namespace llvm;
  
  #endif
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td

index 87ab7920ede6d489e496199849711bc555f35254..b8396c1f5995916b70247d2a2ecb5b1cff7949b3 100644 (file)
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -73,6 +73,9 @@ let usesCustomInserter = 1 in {
    def ATOMIC_LOAD_XOR_I64  : Atomic2Ops<atomic_load_xor_64, GPR64>;
    def ATOMIC_LOAD_NAND_I64 : Atomic2Ops<atomic_load_nand_64, GPR64>;
    def ATOMIC_SWAP_I64      : Atomic2Ops<atomic_swap_64, GPR64>;
+}
+
+let isPseudo = 1 in {
    def ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap<atomic_cmp_swap_64, GPR64>;
  }
  
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp

new file mode 100644 (file)

index 0000000..66fceec
--- /dev/null
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -0,0 +1,341 @@
+//===-- MipsExpandPseudoInsts.cpp - Expand pseudo instructions ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mips-pseudo"
+
+namespace {
+  class MipsExpandPseudo : public MachineFunctionPass {
+  public:
+    static char ID;
+    MipsExpandPseudo() : MachineFunctionPass(ID) {}
+
+    const MipsInstrInfo *TII;
+    const MipsSubtarget *STI;
+
+    bool runOnMachineFunction(MachineFunction &Fn) override;
+
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoVRegs);
+    }
+
+    StringRef getPassName() const override {
+      return "Mips pseudo instruction expansion pass";
+    }
+
+  private:
+    bool expandAtomicCmpSwap(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MBBI,
+                             MachineBasicBlock::iterator &NextMBBI);
+    bool expandAtomicCmpSwapSubword(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    MachineBasicBlock::iterator &NextMBBI);
+    bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                  MachineBasicBlock::iterator &NMBB);
+    bool expandMBB(MachineBasicBlock &MBB);
+   };
+  char MipsExpandPseudo::ID = 0;
+}
+
+static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
+  for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+    MBB->addLiveIn(*I);
+}
+
+bool MipsExpandPseudo::expandAtomicCmpSwapSubword(
+    MachineBasicBlock &BB, MachineBasicBlock::iterator I,
+    MachineBasicBlock::iterator &NMBBI) {
+
+  MachineFunction *MF = BB.getParent();
+
+  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
+  DebugLoc DL = I->getDebugLoc();
+  unsigned LL, SC;
+
+  unsigned ZERO = Mips::ZERO;
+  unsigned BNE = Mips::BNE;
+  unsigned BEQ = Mips::BEQ;
+  unsigned SEOp =
+      I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_FRAG ? Mips::SEB : Mips::SEH;
+
+  if (STI->inMicroMipsMode()) {
+    LL = Mips::LL_MM;
+    SC = Mips::SC_MM;
+  } else {
+    LL = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
+                            : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
+    SC = STI->hasMips32r6() ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
+                            : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
+  }
+
+  unsigned Dest = I->getOperand(0).getReg();
+  unsigned Ptr = I->getOperand(1).getReg();
+  unsigned Mask = I->getOperand(2).getReg();
+  unsigned ShiftCmpVal = I->getOperand(3).getReg();
+  unsigned Mask2 = I->getOperand(4).getReg();
+  unsigned ShiftNewVal = I->getOperand(5).getReg();
+  unsigned ShiftAmnt = I->getOperand(6).getReg();
+
+  LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+  for (auto MBBI = std::prev(BB.end()); MBBI != I; --MBBI)
+    LiveRegs.stepBackward(*MBBI);
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB.getBasicBlock();
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = ++BB.getIterator();
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, sinkMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), &BB,
+                  std::next(MachineBasicBlock::iterator(I)), BB.end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
+
+  //  thisMBB:
+  //    ...
+  //    fallthrough --> loop1MBB
+  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
+  loop1MBB->addSuccessor(sinkMBB);
+  loop1MBB->addSuccessor(loop2MBB);
+  loop2MBB->addSuccessor(loop1MBB);
+  loop2MBB->addSuccessor(sinkMBB);
+  sinkMBB->addSuccessor(exitMBB, BranchProbability::getOne());
+
+  // loop1MBB:
+  //   ll dest, 0(ptr)
+  //   and Mask', dest, Mask
+  //   bne Mask', ShiftCmpVal, exitMBB
+  BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+  BuildMI(loop1MBB, DL, TII->get(Mips::AND), Mask)
+      .addReg(Dest)
+      .addReg(Mask);
+  BuildMI(loop1MBB, DL, TII->get(BNE))
+    .addReg(Mask).addReg(ShiftCmpVal).addMBB(sinkMBB);
+  loop1MBB->addLiveIn(Ptr);
+  loop1MBB->addLiveIn(Mask);
+  loop1MBB->addLiveIn(ShiftCmpVal);
+
+  // loop2MBB:
+  //   and dest, dest, mask2
+  //   or dest, dest, ShiftNewVal
+  //   sc dest, dest, 0(ptr)
+  //   beq dest, $0, loop1MBB
+  BuildMI(loop2MBB, DL, TII->get(Mips::AND), Dest)
+      .addReg(Dest, RegState::Kill)
+      .addReg(Mask2);
+  BuildMI(loop2MBB, DL, TII->get(Mips::OR), Dest)
+      .addReg(Dest, RegState::Kill)
+      .addReg(ShiftNewVal);
+  BuildMI(loop2MBB, DL, TII->get(SC), Dest)
+      .addReg(Dest, RegState::Kill)
+      .addReg(Ptr)
+      .addImm(0);
+  BuildMI(loop2MBB, DL, TII->get(BEQ))
+      .addReg(Dest, RegState::Kill)
+      .addReg(ZERO)
+      .addMBB(loop1MBB);
+  loop2MBB->addLiveIn(Ptr);
+  loop2MBB->addLiveIn(Mask2);
+  loop2MBB->addLiveIn(Dest);
+  loop2MBB->addLiveIn(ShiftNewVal);
+
+  //  sinkMBB:
+  //    srl     srlres, Mask', shiftamt
+  //    sign_extend dest,srlres
+  BuildMI(sinkMBB, DL, TII->get(Mips::SRLV), Dest)
+      .addReg(Mask)
+      .addReg(ShiftAmnt);
+  if (STI->hasMips32r2()) {
+    BuildMI(sinkMBB, DL, TII->get(SEOp), Dest).addReg(Dest);
+  } else {
+    const unsigned ShiftImm =
+        I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I16_FRAG ? 16 : 24;
+    BuildMI(sinkMBB, DL, TII->get(Mips::SLL), Dest)
+        .addReg(Dest, RegState::Kill)
+        .addImm(ShiftImm);
+    BuildMI(sinkMBB, DL, TII->get(Mips::SRA), Dest)
+        .addReg(Dest, RegState::Kill)
+        .addImm(ShiftImm);
+  }
+  sinkMBB->addLiveIn(Mask);
+  sinkMBB->addLiveIn(ShiftAmnt);
+
+  addPostLoopLiveIns(exitMBB, LiveRegs);
+  exitMBB->addLiveIn(Dest);
+
+  NMBBI = BB.end();
+  I->eraseFromParent();
+  return true;
+}
+
+bool MipsExpandPseudo::expandAtomicCmpSwap(MachineBasicBlock &BB,
+                                           MachineBasicBlock::iterator I,
+                                           MachineBasicBlock::iterator &NMBBI) {
+
+  const unsigned Size = I->getOpcode() == Mips::ATOMIC_CMP_SWAP_I32 ? 4 : 8;
+  MachineFunction *MF = BB.getParent();
+
+  const bool ArePtrs64bit = STI->getABI().ArePtrs64bit();
+  DebugLoc DL = I->getDebugLoc();
+
+  LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+  LiveRegs.addLiveOuts(BB);
+  for (auto MBBI = std::prev(BB.end()); MBBI != I; --MBBI)
+    LiveRegs.stepBackward(*MBBI);
+
+  unsigned LL, SC, ZERO, BNE, BEQ;
+
+  if (Size == 4) {
+    if (STI->inMicroMipsMode()) {
+      LL = Mips::LL_MM;
+      SC = Mips::SC_MM;
+    } else {
+      LL = STI->hasMips32r6()
+               ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
+               : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
+      SC = STI->hasMips32r6()
+               ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
+               : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
+    }
+
+    ZERO = Mips::ZERO;
+    BNE = Mips::BNE;
+    BEQ = Mips::BEQ;
+  } else {
+    LL = STI->hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
+    SC = STI->hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
+    ZERO = Mips::ZERO_64;
+    BNE = Mips::BNE64;
+    BEQ = Mips::BEQ64;
+  }
+
+  unsigned Dest = I->getOperand(0).getReg();
+  unsigned Ptr = I->getOperand(1).getReg();
+  unsigned OldVal = I->getOperand(2).getReg();
+  unsigned NewVal = I->getOperand(3).getReg();
+
+  // insert new blocks after the current block
+  const BasicBlock *LLVM_BB = BB.getBasicBlock();
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineFunction::iterator It = ++BB.getIterator();
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), &BB,
+                  std::next(MachineBasicBlock::iterator(I)), BB.end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(&BB);
+
+  //  thisMBB:
+  //    ...
+  //    fallthrough --> loop1MBB
+  BB.addSuccessor(loop1MBB, BranchProbability::getOne());
+  loop1MBB->addSuccessor(exitMBB);
+  loop1MBB->addSuccessor(loop2MBB);
+  loop2MBB->addSuccessor(loop1MBB);
+  loop2MBB->addSuccessor(exitMBB);
+
+  // loop1MBB:
+  //   ll dest, 0(ptr)
+  //   bne dest, oldval, exitMBB
+  BuildMI(loop1MBB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+  BuildMI(loop1MBB, DL, TII->get(BNE))
+    .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
+  loop1MBB->addLiveIn(Ptr);
+  loop1MBB->addLiveIn(OldVal);
+
+  // loop2MBB:
+  //   sc success, newval, 0(ptr)
+  //   beq success, $0, loop1MBB
+  BuildMI(loop2MBB, DL, TII->get(SC), NewVal)
+    .addReg(NewVal).addReg(Ptr).addImm(0);
+  BuildMI(loop2MBB, DL, TII->get(BEQ))
+    .addReg(NewVal, RegState::Kill).addReg(ZERO).addMBB(loop1MBB);
+  loop2MBB->addLiveIn(Ptr);
+  loop2MBB->addLiveIn(NewVal);
+
+  addPostLoopLiveIns(exitMBB, LiveRegs);
+
+  NMBBI = BB.end();
+  I->eraseFromParent();
+  return true;
+}
+
+bool MipsExpandPseudo::expandMI(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                MachineBasicBlock::iterator &NMBB) {
+
+  bool Modified = false;
+  switch (MBBI->getOpcode()) {
+    case Mips::ATOMIC_CMP_SWAP_I32:
+    case Mips::ATOMIC_CMP_SWAP_I64:
+      return expandAtomicCmpSwap(MBB, MBBI, NMBB);
+    case Mips::ATOMIC_CMP_SWAP_I8_FRAG:
+    case Mips::ATOMIC_CMP_SWAP_I16_FRAG:
+      return expandAtomicCmpSwapSubword(MBB, MBBI, NMBB);
+    default:
+      return Modified;
+  }
+}
+
+bool MipsExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  STI = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+  TII = STI->getInstrInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+       ++MFI)
+    Modified |= expandMBB(*MFI);
+
+  return Modified;
+}
+
+/// createMipsExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createMipsExpandPseudoPass() {
+  return new MipsExpandPseudo();
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp

index f0f2424f7224cd4e329fe49c7915816e88ec9b12..1e72f4e47ecde2ba04072277c04e6cecbc37c3a2 100644 (file)
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -1053,14 +1053,11 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
    case Mips::ATOMIC_SWAP_I64:
      return emitAtomicBinary(MI, BB, 8, 0);
  
-  case Mips::ATOMIC_CMP_SWAP_I8:
+  case Mips::ATOMIC_CMP_SWAP_I8_PSEUDO:
      return emitAtomicCmpSwapPartword(MI, BB, 1);
-  case Mips::ATOMIC_CMP_SWAP_I16:
+  case Mips::ATOMIC_CMP_SWAP_I16_PSEUDO:
      return emitAtomicCmpSwapPartword(MI, BB, 2);
-  case Mips::ATOMIC_CMP_SWAP_I32:
-    return emitAtomicCmpSwap(MI, BB, 4);
-  case Mips::ATOMIC_CMP_SWAP_I64:
-    return emitAtomicCmpSwap(MI, BB, 8);
+
    case Mips::PseudoSDIV:
    case Mips::PseudoUDIV:
    case Mips::DIV:
@@ -1407,96 +1404,6 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
    return exitMBB;
  }
  
-MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwap(MachineInstr &MI,
-                                                         MachineBasicBlock *BB,
-                                                         unsigned Size) const {
-  assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
-
-  MachineFunction *MF = BB->getParent();
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
-  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-  const bool ArePtrs64bit = ABI.ArePtrs64bit();
-  DebugLoc DL = MI.getDebugLoc();
-  unsigned LL, SC, ZERO, BNE, BEQ;
-
-  if (Size == 4) {
-    if (isMicroMips) {
-      LL = Mips::LL_MM;
-      SC = Mips::SC_MM;
-    } else {
-      LL = Subtarget.hasMips32r6()
-               ? (ArePtrs64bit ? Mips::LL64_R6 : Mips::LL_R6)
-               : (ArePtrs64bit ? Mips::LL64 : Mips::LL);
-      SC = Subtarget.hasMips32r6()
-               ? (ArePtrs64bit ? Mips::SC64_R6 : Mips::SC_R6)
-               : (ArePtrs64bit ? Mips::SC64 : Mips::SC);
-    }
-
-    ZERO = Mips::ZERO;
-    BNE = Mips::BNE;
-    BEQ = Mips::BEQ;
-  } else {
-    LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
-    SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
-    ZERO = Mips::ZERO_64;
-    BNE = Mips::BNE64;
-    BEQ = Mips::BEQ64;
-  }
-
-  unsigned Dest = MI.getOperand(0).getReg();
-  unsigned Ptr = MI.getOperand(1).getReg();
-  unsigned OldVal = MI.getOperand(2).getReg();
-  unsigned NewVal = MI.getOperand(3).getReg();
-
-  unsigned Success = RegInfo.createVirtualRegister(RC);
-
-  // insert new blocks after the current block
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineFunction::iterator It = ++BB->getIterator();
-  MF->insert(It, loop1MBB);
-  MF->insert(It, loop2MBB);
-  MF->insert(It, exitMBB);
-
-  // Transfer the remainder of BB and its successor edges to exitMBB.
-  exitMBB->splice(exitMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
-  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
-  //  thisMBB:
-  //    ...
-  //    fallthrough --> loop1MBB
-  BB->addSuccessor(loop1MBB);
-  loop1MBB->addSuccessor(exitMBB);
-  loop1MBB->addSuccessor(loop2MBB);
-  loop2MBB->addSuccessor(loop1MBB);
-  loop2MBB->addSuccessor(exitMBB);
-
-  // loop1MBB:
-  //   ll dest, 0(ptr)
-  //   bne dest, oldval, exitMBB
-  BB = loop1MBB;
-  BuildMI(BB, DL, TII->get(LL), Dest).addReg(Ptr).addImm(0);
-  BuildMI(BB, DL, TII->get(BNE))
-    .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
-
-  // loop2MBB:
-  //   sc success, newval, 0(ptr)
-  //   beq success, $0, loop1MBB
-  BB = loop2MBB;
-  BuildMI(BB, DL, TII->get(SC), Success)
-    .addReg(NewVal).addReg(Ptr).addImm(0);
-  BuildMI(BB, DL, TII->get(BEQ))
-    .addReg(Success).addReg(ZERO).addMBB(loop1MBB);
-
-  MI.eraseFromParent(); // The instruction is gone now.
-
-  return exitMBB;
-}
-
  MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
      MachineInstr &MI, MachineBasicBlock *BB, unsigned Size) const {
    assert((Size == 1 || Size == 2) &&
@@ -1521,18 +1428,15 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
    unsigned Mask = RegInfo.createVirtualRegister(RC);
    unsigned Mask2 = RegInfo.createVirtualRegister(RC);
    unsigned ShiftedCmpVal = RegInfo.createVirtualRegister(RC);
-  unsigned OldVal = RegInfo.createVirtualRegister(RC);
-  unsigned MaskedOldVal0 = RegInfo.createVirtualRegister(RC);
    unsigned ShiftedNewVal = RegInfo.createVirtualRegister(RC);
    unsigned MaskLSB2 = RegInfo.createVirtualRegister(RCp);
    unsigned PtrLSB2 = RegInfo.createVirtualRegister(RC);
    unsigned MaskUpper = RegInfo.createVirtualRegister(RC);
    unsigned MaskedCmpVal = RegInfo.createVirtualRegister(RC);
    unsigned MaskedNewVal = RegInfo.createVirtualRegister(RC);
-  unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC);
-  unsigned StoreVal = RegInfo.createVirtualRegister(RC);
-  unsigned SrlRes = RegInfo.createVirtualRegister(RC);
-  unsigned Success = RegInfo.createVirtualRegister(RC);
+  unsigned AtomicOp = MI.getOpcode() == Mips::ATOMIC_CMP_SWAP_I8_PSEUDO
+                          ? Mips::ATOMIC_CMP_SWAP_I8_FRAG
+                          : Mips::ATOMIC_CMP_SWAP_I16_FRAG;
    unsigned LL, SC;
  
    if (isMicroMips) {
@@ -1547,14 +1451,8 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
  
    // insert new blocks after the current block
    const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    MachineFunction::iterator It = ++BB->getIterator();
-  MF->insert(It, loop1MBB);
-  MF->insert(It, loop2MBB);
-  MF->insert(It, sinkMBB);
    MF->insert(It, exitMBB);
  
    // Transfer the remainder of BB and its successor edges to exitMBB.
@@ -1562,12 +1460,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
                    std::next(MachineBasicBlock::iterator(MI)), BB->end());
    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
  
-  BB->addSuccessor(loop1MBB);
-  loop1MBB->addSuccessor(sinkMBB);
-  loop1MBB->addSuccessor(loop2MBB);
-  loop2MBB->addSuccessor(loop1MBB);
-  loop2MBB->addSuccessor(sinkMBB);
-  sinkMBB->addSuccessor(exitMBB);
+  BB->addSuccessor(exitMBB);
  
    // FIXME: computation of newval2 can be moved to loop2MBB.
    //  thisMBB:
@@ -1612,40 +1505,31 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicCmpSwapPartword(
    BuildMI(BB, DL, TII->get(Mips::SLLV), ShiftedNewVal)
      .addReg(MaskedNewVal).addReg(ShiftAmt);
  
-  //  loop1MBB:
-  //    ll      oldval,0(alginedaddr)
-  //    and     maskedoldval0,oldval,mask
-  //    bne     maskedoldval0,shiftedcmpval,sinkMBB
-  BB = loop1MBB;
-  BuildMI(BB, DL, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
-  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal0)
-    .addReg(OldVal).addReg(Mask);
-  BuildMI(BB, DL, TII->get(Mips::BNE))
-    .addReg(MaskedOldVal0).addReg(ShiftedCmpVal).addMBB(sinkMBB);
-
-  //  loop2MBB:
-  //    and     maskedoldval1,oldval,mask2
-  //    or      storeval,maskedoldval1,shiftednewval
-  //    sc      success,storeval,0(alignedaddr)
-  //    beq     success,$0,loop1MBB
-  BB = loop2MBB;
-  BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1)
-    .addReg(OldVal).addReg(Mask2);
-  BuildMI(BB, DL, TII->get(Mips::OR), StoreVal)
-    .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
-  BuildMI(BB, DL, TII->get(SC), Success)
-      .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
-  BuildMI(BB, DL, TII->get(Mips::BEQ))
-      .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
-
-  //  sinkMBB:
-  //    srl     srlres,maskedoldval0,shiftamt
-  //    sign_extend dest,srlres
-  BB = sinkMBB;
-
-  BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes)
-      .addReg(MaskedOldVal0).addReg(ShiftAmt);
-  BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes);
+  // For correctness purpose, a new pseudo is introduced here. We need this
+  // new pseudo, so that FastRegisterAllocator does not see an ll/sc sequence
+  // that is spread over >1 basic blocks. A register allocator which
+  // introduces (or any codegen infact) a store, can violate the expactations
+  // of the hardware.
+  //
+  // An atomic read-modify-write sequence starts with a linked load
+  // instruction and ends with a store conditional instruction. The atomic
+  // read-modify-write sequence failes if any of the following conditions
+  // occur between the execution of ll and sc:
+  //   * A coherent store is completed by another process or coherent I/O
+  //     module into the block of synchronizable physical memory containing
+  //     the word. The size and alignment of the block is
+  //     implementation-dependent.
+  //   * A coherent store is executed between an LL and SC sequence on the
+  //     same processor to the block of synchornizable physical memory
+  //     containing the word.
+  //
+  BuildMI(BB, DL, TII->get(AtomicOp), Dest)
+      .addReg(AlignedAddr)
+      .addReg(Mask)
+      .addReg(ShiftedCmpVal)
+      .addReg(Mask2)
+      .addReg(ShiftedNewVal)
+      .addReg(ShiftAmt);
  
    MI.eraseFromParent(); // The instruction is gone now.
  
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td

index d4b3052cc9363aa75407c837c9b47466aca86bfc..eeddea1ef66dfbf6c8efccfb4620b2e720cf2190 100644 (file)
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1666,6 +1666,10 @@ class AtomicCmpSwap<PatFrag Op, RegisterClass DRC> :
    PseudoSE<(outs DRC:$dst), (ins PtrRC:$ptr, DRC:$cmp, DRC:$swap),
             [(set DRC:$dst, (Op iPTR:$ptr, DRC:$cmp, DRC:$swap))]>;
  
+class AtomicCmpSwapSubword<RegisterClass RC> :
+  PseudoSE<(outs RC:$dst), (ins PtrRC:$ptr, RC:$mask, RC:$ShiftCmpVal,
+                                RC:$mask2, RC:$ShiftNewVal, RC:$ShiftAmt), []>;
+
  class LLBase<string opstr, RegisterOperand RO, DAGOperand MO = mem> :
    InstSE<(outs RO:$rt), (ins MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
           [], II_LL, FrmI, opstr> {
@@ -1744,11 +1748,21 @@ let usesCustomInserter = 1 in {
    def ATOMIC_SWAP_I16      : Atomic2Ops<atomic_swap_16, GPR32>;
    def ATOMIC_SWAP_I32      : Atomic2Ops<atomic_swap_32, GPR32>;
  
-  def ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
-  def ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
-  def ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
+  def ATOMIC_CMP_SWAP_I8_PSEUDO : AtomicCmpSwap<atomic_cmp_swap_8, GPR32>;
+  def ATOMIC_CMP_SWAP_I16_PSEUDO : AtomicCmpSwap<atomic_cmp_swap_16, GPR32>;
  }
  
+let isPseudo = 1 in {
+  // The expansion of ATOMIC_CMP_SWAP_I(8|16) occurs in two parts. First,
+  // the *_PSEUDO is partially lowering during ISelLowering to compute the
+  // aligned addresses and necessary masks, along with another pseudo which
+  // represents the ll/sc loop. That pseudo is lowered after the basic
+  // postRA pseudos have been lowered.
+  def ATOMIC_CMP_SWAP_I8_FRAG : AtomicCmpSwapSubword<GPR32>;
+  def ATOMIC_CMP_SWAP_I16_FRAG : AtomicCmpSwapSubword<GPR32>;
+
+  def ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap<atomic_cmp_swap_32, GPR32>;
+}
  /// Pseudo instructions for loading and storing accumulator registers.
  let isPseudo = 1, isCodeGenOnly = 1, hasNoSchedulingInfo = 1 in {
    def LOAD_ACC64  : Load<"", ACC64>;
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp

index a45a9c4b41c37b62911fa64f1146884460f0820d..d08e3b9f941d26b5c51d871d0c1d6100f3554a0a 100644 (file)
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -213,6 +213,7 @@ public:
    bool addInstSelector() override;
    void addPreEmitPass() override;
    void addPreRegAlloc() override;
+  void addPreSched2() override;
  };
  
  } // end anonymous namespace
@@ -270,3 +271,7 @@ void MipsPassConfig::addPreEmitPass() {
    addPass(createMipsLongBranchPass(TM));
    addPass(createMipsConstantIslandPass());
  }
+
+void MipsPassConfig::addPreSched2() {
+  addPass(createMipsExpandPseudoPass());
+}
diff --git a/test/CodeGen/Mips/atomicCmpSwapPW.ll b/test/CodeGen/Mips/atomicCmpSwapPW.ll

index 981f0983fa4c9ce15c5c06c3c6e65cb18ea8b026..e64501d1fa8b0894fbe7ff0b01f521d5d08a379d 100644 (file)
--- a/test/CodeGen/Mips/atomicCmpSwapPW.ll
+++ b/test/CodeGen/Mips/atomicCmpSwapPW.ll
@@ -5,13 +5,21 @@
  ; RUN: llc -O0 -march=mips64el -mcpu=mips64r2 -target-abi=n64 < %s -filetype=asm -o - \
  ; RUN:   | FileCheck -check-prefixes=PTR64,ALL %s
  
+
+; ALL-LABEL: foo:
  ; PTR32: lw $[[R0:[0-9]+]]
+; PTR32: addiu $[[R1:[0-9]+]], $zero, -4
+; PTR32: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
+
  ; PTR64: ld $[[R0:[0-9]+]]
+; PTR64: daddiu $[[R1:[0-9]+]], $zero, -4
+; PTR64: and $[[R2:[0-9]+]], $[[R0]], $[[R1]]
  
-; ALL: ll ${{[0-9]+}}, 0($[[R0]])
+; ALL: ll ${{[0-9]+}}, 0($[[R2]])
  
-define {i16, i1} @foo(i16* %addr, i16 signext %r, i16 zeroext %new) {
-  %res = cmpxchg i16* %addr, i16 %r, i16 %new seq_cst seq_cst
+define {i16, i1} @foo(i16** %addr, i16 signext %r, i16 zeroext %new) {
+  %ptr = load i16*, i16** %addr
+  %res = cmpxchg i16* %ptr, i16 %r, i16 %new seq_cst seq_cst
    ret {i16, i1} %res
  }
  
diff --git a/test/CodeGen/Mips/no-store-in-atomic-rmw.ll b/test/CodeGen/Mips/no-store-in-atomic-rmw.ll

new file mode 100644 (file)

index 0000000..fc6b278
--- /dev/null
+++ b/test/CodeGen/Mips/no-store-in-atomic-rmw.ll
@@ -0,0 +1,156 @@
+; RUN: llc -O0 -march=mips64 -mcpu=mips64r2 < %s | FileCheck %s
+
+; Check that no stores occur between ll and sc when the fast register allocator
+; is used. Atomic read-modify-write sequences on certain MIPS implementations
+; will fail if a store occurs between a ll and sc.
+
+define i32 @main() {
+; CHECK-LABEL: main:
+entry:
+  %retval = alloca i32, align 4
+  %I = alloca i32, align 4
+  %k = alloca i32, align 4
+  %i = alloca i32*, align 8
+  %ret = alloca i32, align 4
+  %flag_k = alloca i8, align 1
+  %.atomictmp = alloca i32, align 4
+  %atomic-temp = alloca i32, align 4
+  %.atomictmp1 = alloca i32, align 4
+  %atomic-temp2 = alloca i32, align 4
+  %.atomictmp3 = alloca i32, align 4
+  %atomic-temp4 = alloca i32, align 4
+  %.atomictmp5 = alloca i32, align 4
+  %atomic-temp6 = alloca i32, align 4
+  %.atomictmp7 = alloca i32, align 4
+  %atomic-temp8 = alloca i32, align 4
+  %.atomictmp9 = alloca i32, align 4
+  %atomic-temp10 = alloca i32, align 4
+  %.atomictmp11 = alloca i32, align 4
+  %atomic-temp12 = alloca i32, align 4
+  %.atomictmp13 = alloca i32, align 4
+  %cmpxchg.bool = alloca i8, align 1
+  %cmpxchg.bool14 = alloca i8, align 1
+  store i32 0, i32* %retval, align 4
+  store i32 0, i32* %I, align 4
+  store i32 5, i32* %k, align 4
+  store i32* %I, i32** %i, align 8
+  store i32 0, i32* %ret, align 4
+  store i8 0, i8* %flag_k, align 1
+  %0 = load i32*, i32** %i, align 8
+  %1 = load i32, i32* %k, align 4
+  %2 = atomicrmw xchg i32* %0, i32 %1 monotonic
+; CHECK-LABEL: .LBB0_1:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: sc
+  store i32 %2, i32* %ret, align 4
+  %3 = load i32*, i32** %i, align 8
+  store i32 3, i32* %.atomictmp, align 4
+  %4 = load i32, i32* %.atomictmp, align 4
+  %5 = atomicrmw add i32* %3, i32 %4 monotonic
+; CHECK-LABEL: .LBB0_3:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: addu
+; CHECK: sc
+  store i32 %5, i32* %atomic-temp, align 4
+  %6 = load i32, i32* %atomic-temp, align 4
+  %7 = load i32*, i32** %i, align 8
+  store i32 3, i32* %.atomictmp1, align 4
+  %8 = load i32, i32* %.atomictmp1, align 4
+  %9 = atomicrmw sub i32* %7, i32 %8 monotonic
+; CHECK-LABEL: .LBB0_5:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: subu
+; CHECK: sc
+  store i32 %9, i32* %atomic-temp2, align 4
+  %10 = load i32, i32* %atomic-temp2, align 4
+  %11 = load i32*, i32** %i, align 8
+  store i32 3, i32* %.atomictmp3, align 4
+  %12 = load i32, i32* %.atomictmp3, align 4
+  %13 = atomicrmw and i32* %11, i32 %12 monotonic
+; CHECK-LABEL: .LBB0_7:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: and
+; CHECK: sc
+  store i32 %13, i32* %atomic-temp4, align 4
+  %14 = load i32, i32* %atomic-temp4, align 4
+  %15 = load i32*, i32** %i, align 8
+  store i32 3, i32* %.atomictmp5, align 4
+  %16 = load i32, i32* %.atomictmp5, align 4
+  %17 = atomicrmw or i32* %15, i32 %16 monotonic
+; CHECK-LABEL: .LBB0_9:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: or
+; CHECK: sc
+  %18 = load i32*, i32** %i, align 8
+  store i32 5, i32* %.atomictmp13, align 4
+  %19 = load i32, i32* %I, align 4
+  %20 = load i32, i32* %.atomictmp13, align 4
+  %21 = cmpxchg weak i32* %18, i32 %19, i32 %20 monotonic monotonic
+; CHECK-LABEL: .LBB0_11:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: sc
+  %22 = extractvalue { i32, i1 } %21, 0
+  %23 = extractvalue { i32, i1 } %21, 1
+  br i1 %23, label %cmpxchg.continue, label %cmpxchg.store_expected
+
+cmpxchg.store_expected:                           ; preds = %entry
+  store i32 %22, i32* %I, align 4
+  br label %cmpxchg.continue
+
+cmpxchg.continue:                                 ; preds = %cmpxchg.store_expected, %entry
+  %frombool = zext i1 %23 to i8
+  store i8 %frombool, i8* %cmpxchg.bool, align 1
+  %24 = load i8, i8* %cmpxchg.bool, align 1
+  %tobool = trunc i8 %24 to i1
+  %25 = load i32*, i32** %i, align 8
+  %26 = load i32, i32* %I, align 4
+  %27 = load i32, i32* %ret, align 4
+  %28 = cmpxchg i32* %25, i32 %26, i32 %27 monotonic monotonic
+; CHECK-LABEL: .LBB0_17:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: sc
+  %29 = extractvalue { i32, i1 } %28, 0
+  %30 = extractvalue { i32, i1 } %28, 1
+  br i1 %30, label %cmpxchg.continue16, label %cmpxchg.store_expected15
+
+cmpxchg.store_expected15:                         ; preds = %cmpxchg.continue
+  store i32 %29, i32* %I, align 4
+  br label %cmpxchg.continue16
+
+cmpxchg.continue16:                               ; preds = %cmpxchg.store_expected15, %cmpxchg.continue
+  %frombool17 = zext i1 %30 to i8
+  store i8 %frombool17, i8* %cmpxchg.bool14, align 1
+  %31 = load i8, i8* %cmpxchg.bool14, align 1
+  %tobool18 = trunc i8 %31 to i1
+  %32 = atomicrmw xchg i8* %flag_k, i8 1 monotonic
+; CHECK-LABEL: .LBB0_23:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: sc
+  %tobool19 = icmp ne i8 %32, 0
+  %33 = atomicrmw xchg i8* %flag_k, i8 1 monotonic
+; CHECK-LABEL: .LBB0_26:
+; CHECK: ll
+; CHECK-NOT: sd
+; CHECK-NOT: sw
+; CHECK: sc
+  %tobool20 = icmp ne i8 %33, 0
+  store atomic i8 0, i8* %flag_k monotonic, align 1
+  %34 = load i32, i32* %retval, align 4
+  ret i32 %34
+}
author	Simon Dardis <simon.dardis@imgtec.com>
	Fri, 24 Feb 2017 16:32:18 +0000 (16:32 +0000)
committer	Simon Dardis <simon.dardis@imgtec.com>
	Fri, 24 Feb 2017 16:32:18 +0000 (16:32 +0000)
lib/Target/Mips/CMakeLists.txt		patch \| blob \| history
lib/Target/Mips/Mips.h		patch \| blob \| history
lib/Target/Mips/Mips64InstrInfo.td		patch \| blob \| history
lib/Target/Mips/MipsExpandPseudo.cpp	[new file with mode: 0644]	patch \| blob
lib/Target/Mips/MipsISelLowering.cpp		patch \| blob \| history
lib/Target/Mips/MipsInstrInfo.td		patch \| blob \| history
lib/Target/Mips/MipsTargetMachine.cpp		patch \| blob \| history
test/CodeGen/Mips/atomicCmpSwapPW.ll		patch \| blob \| history
test/CodeGen/Mips/no-store-in-atomic-rmw.ll	[new file with mode: 0644]	patch \| blob