From: Sanjay Patel Date: Thu, 4 Apr 2019 14:46:13 +0000 (+0000) Subject: [x86] eliminate unnecessary broadcast of horizontal op X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22283247bc9793cfd2db3ae0876c4ed4bc5663b1;p=llvm [x86] eliminate unnecessary broadcast of horizontal op This is another pattern that comes up if we more aggressively scalarize FP ops. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357703 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b17d7276e0b..e0c3aaac723 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -32790,10 +32790,19 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, /// Eliminate a redundant shuffle of a horizontal math op. static SDValue foldShuffleOfHorizOp(SDNode *N) { unsigned Opcode = N->getOpcode(); - if (Opcode != X86ISD::MOVDDUP) + if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST) if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef()) return SDValue(); + // For a broadcast, peek through an extract element of index 0 to find the + // horizontal op: broadcast (ext_vec_elt HOp, 0) + if (Opcode == X86ISD::VBROADCAST) { + SDValue SrcOp = N->getOperand(0); + if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1))) + N = SrcOp.getNode(); + } + SDValue HOp = N->getOperand(0); if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD && HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB) @@ -32808,10 +32817,11 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { return SDValue(); // When the operands of a horizontal math op are identical, the low half of - // the result is the same as the high half. If the shuffle is also replicating - // low and high halves, we don't need the shuffle. - if (Opcode == X86ISD::MOVDDUP) { + // the result is the same as the high half. If a target shuffle is also + // replicating low and high halves, we don't need the shuffle. + if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) { // movddup (hadd X, X) --> hadd X, X + // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X assert((HOp.getValueType() == MVT::v2f64 || HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op"); return HOp; diff --git a/test/CodeGen/X86/haddsub-shuf.ll b/test/CodeGen/X86/haddsub-shuf.ll index c37e9862ed7..b93555a091b 100644 --- a/test/CodeGen/X86/haddsub-shuf.ll +++ b/test/CodeGen/X86/haddsub-shuf.ll @@ -349,7 +349,6 @@ define <2 x double> @hadd_v2f64_scalar_splat(<2 x double> %a) { ; AVX2_FAST-LABEL: hadd_v2f64_scalar_splat: ; AVX2_FAST: # %bb.0: ; AVX2_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX2_FAST-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; AVX2_FAST-NEXT: retq %a0 = extractelement <2 x double> %a, i32 0 %a1 = extractelement <2 x double> %a, i32 1