From f726aa1cb6c79081a54091c31cb0f9b0e8557b50 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 25 Aug 2019 17:59:49 +0000
Subject: [PATCH] [X86][DAGCombiner] Teach narrowShuffle to use concat_vectors
 instead of inserting into undef

Summary:
Concat_vectors is more canonical during early DAG combine. For example, its what's used by SelectionDAGBuilder when converting IR shuffles into SelectionDAG shuffles when element counts between inputs and mask don't match. We also have combines in DAGCombiner than can pull concat_vectors through a shuffle. See partitionShuffleOfConcats. So it seems like concat_vectors is a better operation to use here. I had to teach DAGCombiner's SimplifyVBinOp to also handle concat_vectors with undef. I haven't checked yet if we can remove the INSERT_SUBVECTOR version in there or not.

I didn't want to mess with the other caller of getShuffleHalfVectors that's used during shuffle lowering where insert_subvector probably is what we want to produce so I've enabled this via a boolean passed to the function.

Reviewers: spatel, RKSimon

Reviewed By: RKSimon

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D66504

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@369872 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 31 ++++++++++++++++++++++++
 lib/Target/X86/X86ISelLowering.cpp       | 12 +++++++--
 2 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 75fb6559b9b..b8af674a1e2 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19537,6 +19537,37 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
     }
   }
 
+  // Make sure all but the first op are undef.
+  auto ConcatWithUndef = [](SDValue Concat) {
+    return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
+           std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
+                       [](const SDValue &Op) {
+                         return Op.isUndef();
+                       });
+  };
+
+  // The following pattern is likely to emerge with vector reduction ops. Moving
+  // the binary operation ahead of the concat may allow using a narrower vector
+  // instruction that has better performance than the wide version of the op:
+  // VBinOp (concat X, undef), (concat Y, undef) --> concat (VBinOp X, Y), VecC
+  if (ConcatWithUndef(LHS) && ConcatWithUndef(RHS) &&
+      (LHS.hasOneUse() || RHS.hasOneUse())) {
+    SDValue X = LHS.getOperand(0);
+    SDValue Y = RHS.getOperand(0);
+    EVT NarrowVT = X.getValueType();
+    if (NarrowVT == Y.getValueType() &&
+        TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+      // (binop undef, undef) may not return undef, so compute that result.
+      SDLoc DL(N);
+      SDValue VecC =
+          DAG.getNode(Opcode, DL, NarrowVT, DAG.getUNDEF(NarrowVT),
+                      DAG.getUNDEF(NarrowVT));
+      SmallVector<SDValue, 4> Ops(LHS.getNumOperands(), VecC);
+      Ops[0] = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
+    }
+  }
+
   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
     return V;
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 05218a3d6d2..f654f1d7a72 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -15013,7 +15013,7 @@ getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask,
 static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
                                      ArrayRef<int> HalfMask, int HalfIdx1,
                                      int HalfIdx2, bool UndefLower,
-                                     SelectionDAG &DAG) {
+                                     SelectionDAG &DAG, bool UseConcat = false) {
   assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?");
   assert(V1.getValueType().isSimple() && "Expecting only simple types");
 
@@ -15034,6 +15034,14 @@ static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2,
   SDValue Half1 = getHalfVector(HalfIdx1);
   SDValue Half2 = getHalfVector(HalfIdx2);
   SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+  if (UseConcat) {
+    SDValue Op0 = V;
+    SDValue Op1 = DAG.getUNDEF(HalfVT);
+    if (UndefLower)
+      std::swap(Op0, Op1);
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1);
+  }
+
   unsigned Offset = UndefLower ? HalfNumElts : 0;
   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
                      DAG.getIntPtrConstant(Offset, DL));
@@ -33974,7 +33982,7 @@ static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
   // the wide shuffle that we started with.
   return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
                                Shuf->getOperand(1), HalfMask, HalfIdx1,
-                               HalfIdx2, false, DAG);
+                               HalfIdx2, false, DAG, /*UseConcat*/true);
 }
 
 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
-- 
2.40.0