#include "AArch64.h"
#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n");
continue;
}
+ if (MF.getSubtarget<AArch64Subtarget>().hasLSE()) {
+ // XZ/WZ for LSE can only be used when acquire semantics are not used,
+ // LDOPAL WZ is an invalid opcode.
+ switch (MI.getOpcode()) {
+ case AArch64::CASALb:
+ case AArch64::CASALh:
+ case AArch64::CASALs:
+ case AArch64::CASALd:
+ case AArch64::SWPALb:
+ case AArch64::SWPALh:
+ case AArch64::SWPALs:
+ case AArch64::SWPALd:
+ case AArch64::LDADDALb:
+ case AArch64::LDADDALh:
+ case AArch64::LDADDALs:
+ case AArch64::LDADDALd:
+ case AArch64::LDEORALb:
+ case AArch64::LDEORALh:
+ case AArch64::LDEORALs:
+ case AArch64::LDEORALd:
+ case AArch64::LDSETALb:
+ case AArch64::LDSETALh:
+ case AArch64::LDSETALs:
+ case AArch64::LDSETALd:
+ case AArch64::LDSMINALb:
+ case AArch64::LDSMINALh:
+ case AArch64::LDSMINALs:
+ case AArch64::LDSMINALd:
+ case AArch64::LDSMAXALb:
+ case AArch64::LDSMAXALh:
+ case AArch64::LDSMAXALs:
+ case AArch64::LDSMAXALd:
+ case AArch64::LDUMINALb:
+ case AArch64::LDUMINALh:
+ case AArch64::LDUMINALs:
+ case AArch64::LDUMINALd:
+ case AArch64::LDUMAXALb:
+ case AArch64::LDUMAXALh:
+ case AArch64::LDUMAXALs:
+ case AArch64::LDUMAXALd:
+ continue;
+ default:
+ break;
+ }
+ }
const MCInstrDesc &Desc = MI.getDesc();
for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
- void SelectCMP_SWAP(SDNode *N);
+ bool SelectCMP_SWAP(SDNode *N);
};
} // end anonymous namespace
}
/// We've got special pseudo-instructions for these
-void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
+bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
unsigned Opcode;
EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
+
+ // Leave IR for LSE if subtarget supports it.
+ if (Subtarget->hasLSE()) return false;
+
if (MemTy == MVT::i8)
Opcode = AArch64::CMP_SWAP_8;
else if (MemTy == MVT::i16)
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
CurDAG->RemoveDeadNode(N);
+
+ return true;
}
void AArch64DAGToDAGISel::Select(SDNode *Node) {
break;
case ISD::ATOMIC_CMP_SWAP:
- SelectCMP_SWAP(Node);
- return;
+ if (SelectCMP_SWAP(Node))
+ return;
+ break;
case ISD::READ_REGISTER:
if (tryReadRegister(Node))
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
- return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
+ if (Size > 128) return AtomicExpansionKind::None;
+ // Nand not supported in LSE.
+ if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
+ // Currently leaving And and Sub to LLSC
+ if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub))
+ return AtomicExpansionKind::LLSC;
+ // Leave 128 bits to LLSC.
+ return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
}
bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
+ // If subtarget has LSE, leave cmpxchg intact for codegen.
+ if (Subtarget->hasLSE()) return false;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
GPR64:$newLo, GPR64:$newHi), []>,
Sched<[WriteAtomic]>;
+
+// v8.1 Atomic instructions:
+def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALb GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
+def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALh GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
+def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALs GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>;
+def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALd GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>;
+
+def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>;
+def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>;