From bf9abd54ab40fe0c58eed09868e2fa8ff3e1d8a0 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@hanshq.net>
Date: Tue, 22 Jan 2019 19:02:30 +0000
Subject: [PATCH] Merging r351753:
 ------------------------------------------------------------------------
 r351753 | spatel | 2019-01-21 18:30:14 +0100 (Mon, 21 Jan 2019) | 8 lines

[DAGCombiner] fix crash when converting build vector to shuffle

The regression test is reduced from the example shown in D56281.
This does raise a question as noted in the test file: do we want
to handle this pattern? I don't have a motivating example for
that on x86 yet, but it seems like we could have that pattern
there too, so we could avoid the back-and-forth using a shuffle.

------------------------------------------------------------------------


git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@351857 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp     | 16 +++++++++-----
 test/CodeGen/AArch64/build-vector-extract.ll | 22 ++++++++++++++++++++
 2 files changed, 33 insertions(+), 5 deletions(-)
 create mode 100644 test/CodeGen/AArch64/build-vector-extract.ll
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff5505c9772..6af01423ca1 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16214,23 +16214,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   // The build vector contains some number of undef elements and exactly
   // one other element. That other element must be a zero-extended scalar
   // extracted from a vector at a constant index to turn this into a shuffle.
+  // Also, require that the build vector does not implicitly truncate/extend
+  // its elements.
   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+  EVT VT = BV->getValueType(0);
   SDValue Zext = BV->getOperand(ZextElt);
   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+      !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+      Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
     return SDValue();
 
-  // The zero-extend must be a multiple of the source size.
+  // The zero-extend must be a multiple of the source size, and we must be
+  // building a vector of the same size as the source of the extract element.
   SDValue Extract = Zext.getOperand(0);
   unsigned DestSize = Zext.getValueSizeInBits();
   unsigned SrcSize = Extract.getValueSizeInBits();
-  if (DestSize % SrcSize != 0)
+  if (DestSize % SrcSize != 0 ||
+      Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
     return SDValue();
 
   // Create a shuffle mask that will combine the extracted element with zeros
   // and undefs.
-  int ZextRatio =  DestSize / SrcSize;
+  int ZextRatio = DestSize / SrcSize;
   int NumMaskElts = NumBVOps * ZextRatio;
   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
   for (int i = 0; i != NumMaskElts; ++i) {
@@ -16260,7 +16266,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
                                       ShufMask);
-  return DAG.getBitcast(BV->getValueType(0), Shuf);
+  return DAG.getBitcast(VT, Shuf);
 }
 
 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
diff --git a/test/CodeGen/AArch64/build-vector-extract.ll b/test/CodeGen/AArch64/build-vector-extract.ll
new file mode 100644
index 00000000000..bba3a22cf33
--- /dev/null
+++ b/test/CodeGen/AArch64/build-vector-extract.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
+
+; This would crash because we did not expect to create
+; a shuffle for a vector where the source operand is
+; not the same size as the result.
+; TODO: Should we handle this pattern? Ie, is moving to/from
+; registers the optimal code?
+
+define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) {
+; CHECK-LABEL: larger_bv_than_source:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    umov w8, v0.h[2]
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %t1 = extractelement <4 x i16> %t0, i32 2
+  %vgetq_lane = zext i16 %t1 to i32
+  %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0
+  ret <4 x i32> %t2
+}
+
-- 
2.50.1