From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 4 Apr 2019 14:46:13 +0000 (+0000)
Subject: [x86] eliminate unnecessary broadcast of horizontal op
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22283247bc9793cfd2db3ae0876c4ed4bc5663b1;p=llvm

[x86] eliminate unnecessary broadcast of horizontal op

This is another pattern that comes up if we more aggressively
scalarize FP ops.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357703 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b17d7276e0b..e0c3aaac723 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -32790,10 +32790,19 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
 /// Eliminate a redundant shuffle of a horizontal math op.
 static SDValue foldShuffleOfHorizOp(SDNode *N) {
   unsigned Opcode = N->getOpcode();
-  if (Opcode != X86ISD::MOVDDUP)
+  if (Opcode != X86ISD::MOVDDUP && Opcode != X86ISD::VBROADCAST)
     if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef())
       return SDValue();
 
+  // For a broadcast, peek through an extract element of index 0 to find the
+  // horizontal op: broadcast (ext_vec_elt HOp, 0)
+  if (Opcode == X86ISD::VBROADCAST) {
+    SDValue SrcOp = N->getOperand(0);
+    if (SrcOp.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+        SrcOp.getValueType() == MVT::f64 && isNullConstant(SrcOp.getOperand(1)))
+      N = SrcOp.getNode();
+  }
+
   SDValue HOp = N->getOperand(0);
   if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD &&
       HOp.getOpcode() != X86ISD::HSUB && HOp.getOpcode() != X86ISD::FHSUB)
@@ -32808,10 +32817,11 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) {
     return SDValue();
 
   // When the operands of a horizontal math op are identical, the low half of
-  // the result is the same as the high half. If the shuffle is also replicating
-  // low and high halves, we don't need the shuffle.
-  if (Opcode == X86ISD::MOVDDUP) {
+  // the result is the same as the high half. If a target shuffle is also
+  // replicating low and high halves, we don't need the shuffle.
+  if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
     // movddup (hadd X, X) --> hadd X, X
+    // broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
     assert((HOp.getValueType() == MVT::v2f64 ||
             HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op");
     return HOp;
diff --git a/test/CodeGen/X86/haddsub-shuf.ll b/test/CodeGen/X86/haddsub-shuf.ll
index c37e9862ed7..b93555a091b 100644
--- a/test/CodeGen/X86/haddsub-shuf.ll
+++ b/test/CodeGen/X86/haddsub-shuf.ll
@@ -349,7 +349,6 @@ define <2 x double> @hadd_v2f64_scalar_splat(<2 x double> %a) {
 ; AVX2_FAST-LABEL: hadd_v2f64_scalar_splat:
 ; AVX2_FAST:       # %bb.0:
 ; AVX2_FAST-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
-; AVX2_FAST-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
 ; AVX2_FAST-NEXT:    retq
   %a0 = extractelement <2 x double> %a, i32 0
   %a1 = extractelement <2 x double> %a, i32 1