From 0ecb8857e93beea2b2853241dda8dac1f0219b57 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 19 Oct 2017 15:02:24 +0000 Subject: [PATCH] [X86] Replace custom scalar integer absolute matching with ISD::ABS lowering. x86 has its own copy of integer absolute pattern matching to combine directly to a SUB+CMOV. This patch removes the x86 combine and adds custom lowering support for ISD::ABS instead, allowing us to use the DAGCombiner version. Additional test cases are already covered by iabs.ll (rL315706 and rL315711). Differential Revision: https://reviews.llvm.org/D38895 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@316162 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 57 +++++++++++------------------- test/CodeGen/X86/combine-abs.ll | 7 ++-- 2 files changed, 23 insertions(+), 41 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index a8900b04f06..8e06b481217 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -188,6 +188,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); + // Integer absolute. + if (Subtarget.hasCMov()) { + setOperationAction(ISD::ABS , MVT::i16 , Custom); + setOperationAction(ISD::ABS , MVT::i32 , Custom); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS , MVT::i64 , Custom); + } + // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this // operation. setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); @@ -21502,6 +21510,19 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) { } static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { + // Since X86 does not have CMOV for 8-bit integer, we don't convert + // 8-bit integer abs to NEG and CMOV. + SDLoc DL(Op); + SDValue N0 = Op.getOperand(0); + SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), + DAG.getConstant(0, DL, VT), N0); + SDValue Ops[] = {N0, Neg, DAG.getConstant(X86::COND_GE, DL, MVT::i8), + SDValue(Neg.getNode(), 1)}; + return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); + } + assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); @@ -32823,38 +32844,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// Generate NEG and CMOV for integer abs. -static SDValue combineIntegerAbs(SDNode *N, SelectionDAG &DAG) { - EVT VT = N->getValueType(0); - - // Since X86 does not have CMOV for 8-bit integer, we don't convert - // 8-bit integer abs to NEG and CMOV. - if (VT.isInteger() && VT.getSizeInBits() == 8) - return SDValue(); - - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDLoc DL(N); - - // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1) - // and change it to SUB and CMOV. - if (VT.isInteger() && N->getOpcode() == ISD::XOR && - N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) { - auto *Y1C = dyn_cast(N1.getOperand(1)); - if (Y1C && Y1C->getAPIntValue() == VT.getSizeInBits() - 1) { - // Generate SUB & CMOV. - SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), - DAG.getConstant(0, DL, VT), N0.getOperand(0)); - SDValue Ops[] = {N0.getOperand(0), Neg, - DAG.getConstant(X86::COND_GE, DL, MVT::i8), - SDValue(Neg.getNode(), 1)}; - return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); - } - } - return SDValue(); -} - /// Try to turn tests against the signbit in the form of: /// XOR(TRUNCATE(SRL(X, size(X)-1)), 1) /// into: @@ -34436,10 +34425,6 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) return RV; - if (Subtarget.hasCMov()) - if (SDValue RV = combineIntegerAbs(N, DAG)) - return RV; - if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) return FPLogic; diff --git a/test/CodeGen/X86/combine-abs.ll b/test/CodeGen/X86/combine-abs.ll index fa476a2a5da..a53a13ac00a 100644 --- a/test/CodeGen/X86/combine-abs.ll +++ b/test/CodeGen/X86/combine-abs.ll @@ -26,12 +26,9 @@ define <16 x i16> @combine_v16i16_abs_constant() { define i32 @combine_i32_abs_abs(i32 %a) { ; CHECK-LABEL: combine_i32_abs_abs: ; CHECK: # BB#0: -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: negl %ecx -; CHECK-NEXT: cmovll %edi, %ecx -; CHECK-NEXT: movl %ecx, %eax +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: negl %eax -; CHECK-NEXT: cmovll %ecx, %eax +; CHECK-NEXT: cmovll %edi, %eax ; CHECK-NEXT: retq %n1 = sub i32 zeroinitializer, %a %b1 = icmp slt i32 %a, zeroinitializer -- 2.40.0