return DAG.getNode(ISD::OR, DL, VT, Lo, Hi);
}
-static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG) {
+static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget,
+ bool AllowIncDec = true) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
}
MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();
+
+ if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
+ // Convert to inc/dec if they aren't slow or we are optimizing for size.
+ if (AllowIncDec && (!Subtarget.slowIncDec() ||
+ DAG.getMachineFunction().getFunction()->optForSize())) {
+ if ((NewOpc == X86ISD::LADD && C->isOne()) ||
+ (NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
+ return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {N->getOperand(0), N->getOperand(1)},
+ /*MemVT=*/N->getSimpleValueType(0), MMO);
+ if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
+ (NewOpc == X86ISD::LADD && C->isAllOnesValue()))
+ return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {N->getOperand(0), N->getOperand(1)},
+ /*MemVT=*/N->getSimpleValueType(0), MMO);
+ }
+ }
+
return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
return N;
}
- SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG);
+ SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget);
// RAUW the chain, but don't worry about the result, as it's unused.
assert(!N->hasAnyUseOfValue(0));
DAG.ReplaceAllUsesOfValueWith(N.getValue(1), LockOp.getValue(1));
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
+ case X86ISD::LINC: return "X86ISD::LINC";
+ case X86ISD::LDEC: return "X86ISD::LDEC";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VZEXT: return "X86ISD::VZEXT";
/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
/// Note that this is only legal for some op/cc combinations.
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// This combine only operates on CMP-like nodes.
if (!(Cmp.getOpcode() == X86ISD::CMP ||
(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
- auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG);
+ // If the comparision uses the CF flag we can't use INC/DEC instructions.
+ bool NeedCF = false;
+ switch (CC) {
+ default: break;
+ case X86::COND_A: case X86::COND_AE:
+ case X86::COND_B: case X86::COND_BE:
+ NeedCF = true;
+ break;
+ }
+ auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
else
return SDValue();
- SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG);
+ SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
/// uses of chain values.
static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
if (CC == X86::COND_B)
if (SDValue Flags = combineCarryThroughADD(EFLAGS))
return Flags;
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
return R;
- return combineSetCCAtomicArith(EFLAGS, CC, DAG);
+ return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget);
}
/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
// Try to simplify the EFLAGS and condition code operands.
// We can't always do this as FCMOV only supports a subset of X86 cond.
- if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
+ if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) {
if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
Flags};
SDValue EFLAGS = N->getOperand(1);
// Try to simplify the EFLAGS and condition code operands.
- if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget))
return getSETCC(CC, Flags, DL, DAG);
return SDValue();
// Try to simplify the EFLAGS and condition code operands.
// Make sure to not keep references to operands, as combineSetCCEFLAGS can
// RAUW them under us.
- if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
+ if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
N->getOperand(1), Cond, Flags);
/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
- LADD, LSUB, LOR, LXOR, LAND,
+ LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
- SDNode Op, string mnemonic> {
+ string frag, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
!strconcat(mnemonic, "{b}\t$dst"),
- [(set EFLAGS, (Op addr:$dst, (i8 1)))],
+ [(set EFLAGS, (!cast<PatFrag>(frag # "_8") addr:$dst))],
IIC_UNARY_MEM>, LOCK;
def NAME#16m : I<Opc, Form, (outs), (ins i16mem:$dst),
!strconcat(mnemonic, "{w}\t$dst"),
- [(set EFLAGS, (Op addr:$dst, (i16 1)))],
+ [(set EFLAGS, (!cast<PatFrag>(frag # "_16") addr:$dst))],
IIC_UNARY_MEM>, OpSize16, LOCK;
def NAME#32m : I<Opc, Form, (outs), (ins i32mem:$dst),
!strconcat(mnemonic, "{l}\t$dst"),
- [(set EFLAGS, (Op addr:$dst, (i32 1)))],
+ [(set EFLAGS, (!cast<PatFrag>(frag # "_32") addr:$dst))],
IIC_UNARY_MEM>, OpSize32, LOCK;
def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
!strconcat(mnemonic, "{q}\t$dst"),
- [(set EFLAGS, (Op addr:$dst, (i64 1)))],
+ [(set EFLAGS, (!cast<PatFrag>(frag # "_64") addr:$dst))],
IIC_UNARY_MEM>, LOCK;
}
}
-let Predicates = [UseIncDec] in {
-defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, X86lock_add, "inc">;
-defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, X86lock_sub, "dec">;
+multiclass unary_atomic_intrin<SDNode atomic_op> {
+ def _8 : PatFrag<(ops node:$ptr),
+ (atomic_op node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+ }]>;
+ def _16 : PatFrag<(ops node:$ptr),
+ (atomic_op node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+ }]>;
+ def _32 : PatFrag<(ops node:$ptr),
+ (atomic_op node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+ }]>;
+ def _64 : PatFrag<(ops node:$ptr),
+ (atomic_op node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+ }]>;
}
+defm X86lock_inc : unary_atomic_intrin<X86lock_inc>;
+defm X86lock_dec : unary_atomic_intrin<X86lock_dec>;
+
+defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, "X86lock_inc", "inc">;
+defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, "X86lock_dec", "dec">;
+
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
SDPatternOperator frag, X86MemOperand x86memop,
SDTCisPtrTy<1>,
SDTCisInt<2>]>;
+def SDTLockUnaryArithWithFlags : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+ SDTCisPtrTy<1>]>;
+
def SDTX86Ret : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad,
SDNPMemOperand]>;
+def X86lock_inc : SDNode<"X86ISD::LINC", SDTLockUnaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags,
+ [SDNPHasChain, SDNPMayStore, SDNPMayLoad,
+ SDNPMemOperand]>;
+
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
}
define i32 @test_sub_1_cmov_sle(i64* %p, i32 %a0, i32 %a1) #0 {
-; CHECK-LABEL: test_sub_1_cmov_sle:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: lock addq $-1, (%rdi)
-; CHECK-NEXT: cmovgel %edx, %esi
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: retq
+; FASTINCDEC-LABEL: test_sub_1_cmov_sle:
+; FASTINCDEC: # BB#0: # %entry
+; FASTINCDEC-NEXT: lock decq (%rdi)
+; FASTINCDEC-NEXT: cmovgel %edx, %esi
+; FASTINCDEC-NEXT: movl %esi, %eax
+; FASTINCDEC-NEXT: retq
+;
+; SLOWINCDEC-LABEL: test_sub_1_cmov_sle:
+; SLOWINCDEC: # BB#0: # %entry
+; SLOWINCDEC-NEXT: lock addq $-1, (%rdi)
+; SLOWINCDEC-NEXT: cmovgel %edx, %esi
+; SLOWINCDEC-NEXT: movl %esi, %eax
+; SLOWINCDEC-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp sle i64 %tmp0, 0
}
define i32 @test_sub_1_cmov_sgt(i64* %p, i32 %a0, i32 %a1) #0 {
-; CHECK-LABEL: test_sub_1_cmov_sgt:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: lock addq $-1, (%rdi)
-; CHECK-NEXT: cmovll %edx, %esi
-; CHECK-NEXT: movl %esi, %eax
-; CHECK-NEXT: retq
+; FASTINCDEC-LABEL: test_sub_1_cmov_sgt:
+; FASTINCDEC: # BB#0: # %entry
+; FASTINCDEC-NEXT: lock decq (%rdi)
+; FASTINCDEC-NEXT: cmovll %edx, %esi
+; FASTINCDEC-NEXT: movl %esi, %eax
+; FASTINCDEC-NEXT: retq
+;
+; SLOWINCDEC-LABEL: test_sub_1_cmov_sgt:
+; SLOWINCDEC: # BB#0: # %entry
+; SLOWINCDEC-NEXT: lock addq $-1, (%rdi)
+; SLOWINCDEC-NEXT: cmovll %edx, %esi
+; SLOWINCDEC-NEXT: movl %esi, %eax
+; SLOWINCDEC-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp sgt i64 %tmp0, 0
}
define i8 @test_sub_1_setcc_sgt(i64* %p) #0 {
-; CHECK-LABEL: test_sub_1_setcc_sgt:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: lock addq $-1, (%rdi)
-; CHECK-NEXT: setge %al
-; CHECK-NEXT: retq
+; FASTINCDEC-LABEL: test_sub_1_setcc_sgt:
+; FASTINCDEC: # BB#0: # %entry
+; FASTINCDEC-NEXT: lock decq (%rdi)
+; FASTINCDEC-NEXT: setge %al
+; FASTINCDEC-NEXT: retq
+;
+; SLOWINCDEC-LABEL: test_sub_1_setcc_sgt:
+; SLOWINCDEC: # BB#0: # %entry
+; SLOWINCDEC-NEXT: lock addq $-1, (%rdi)
+; SLOWINCDEC-NEXT: setge %al
+; SLOWINCDEC-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp sgt i64 %tmp0, 0
}
define i8 @test_sub_1_cmp_1_setcc_ugt(i64* %p) #0 {
-; FASTINCDEC-LABEL: test_sub_1_cmp_1_setcc_ugt:
-; FASTINCDEC: # BB#0: # %entry
-; FASTINCDEC-NEXT: lock decq (%rdi)
-; FASTINCDEC-NEXT: seta %al
-; FASTINCDEC-NEXT: retq
-;
-; SLOWINCDEC-LABEL: test_sub_1_cmp_1_setcc_ugt:
-; SLOWINCDEC: # BB#0: # %entry
-; SLOWINCDEC-NEXT: lock subq $1, (%rdi)
-; SLOWINCDEC-NEXT: seta %al
-; SLOWINCDEC-NEXT: retq
+; CHECK-LABEL: test_sub_1_cmp_1_setcc_ugt:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: lock subq $1, (%rdi)
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: retq
entry:
%tmp0 = atomicrmw sub i64* %p, i64 1 seq_cst
%tmp1 = icmp ugt i64 %tmp0, 1