From c6c8ba6804d5b60e27360d7e0cac05f41e5a8f2e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 26 Aug 2019 22:17:54 +0000 Subject: [PATCH] [X86] Delay combineIncDecVector until after op legalization. Probably better to keep add over sub in early DAG combines. It might make sense to push this to lowering or delay it all the way to isel. But this was the simplest change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369981 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++----- test/CodeGen/X86/i128-add.ll | 8 ++++---- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9f6a0b76e06..72ef6bc6cf8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -43712,10 +43712,18 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, /// The all-ones vector constant can be materialized using a pcmpeq instruction /// that is commonly recognized as an idiom (has no register dependency), so /// that's better/smaller than loading a splat 1 constant. -static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG) { +static SDValue combineIncDecVector(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && "Unexpected opcode for increment/decrement transform"); + // Delay this until legalize ops to avoid interfering with early DAG combines + // that may expect canonical adds. + // FIXME: We may want to consider moving this to custom lowering or all the + // way to isel, but lets start here. + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + // Pseudo-legality check: getOnesVector() expects one of these types, so bail // out and wait for legalization if we have an unsupported vector length. EVT VT = N->getValueType(0); @@ -43962,6 +43970,7 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, } static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { const SDNodeFlags Flags = N->getFlags(); if (Flags.hasVectorReduction()) { @@ -43992,7 +44001,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, HADDBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) + if (SDValue V = combineIncDecVector(N, DAG, DCI)) return V; return combineAddOrSubToADCOrSBB(N, DAG); @@ -44086,6 +44095,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG, } static SDValue combineSub(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); @@ -44122,7 +44132,7 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG, HSUBBuilder); } - if (SDValue V = combineIncDecVector(N, DAG)) + if (SDValue V = combineIncDecVector(N, DAG, DCI)) return V; // Try to create PSUBUS if SUB's argument is max/min @@ -44761,8 +44771,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); case X86ISD::CMP: return combineCMP(N, DAG); - case ISD::ADD: return combineAdd(N, DAG, Subtarget); - case ISD::SUB: return combineSub(N, DAG, Subtarget); + case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget); + case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget); case X86ISD::ADD: case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); case X86ISD::SBB: return combineSBB(N, DAG); diff --git a/test/CodeGen/X86/i128-add.ll b/test/CodeGen/X86/i128-add.ll index 182ada4c458..8e75c4a575e 100644 --- a/test/CodeGen/X86/i128-add.ll +++ b/test/CodeGen/X86/i128-add.ll @@ -57,10 +57,10 @@ define <1 x i128> @add_v1i128(<1 x i128> %x, <1 x i128> %y) nounwind { ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edi ; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: subl $-1, %esi -; X86-NEXT: sbbl $-1, %edi -; X86-NEXT: sbbl $-1, %edx -; X86-NEXT: sbbl $-1, %ecx +; X86-NEXT: addl $1, %esi +; X86-NEXT: adcl $0, %edi +; X86-NEXT: adcl $0, %edx +; X86-NEXT: adcl $0, %ecx ; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movl %edi, 4(%eax) ; X86-NEXT: movl %edx, 8(%eax) -- 2.40.0