From bf9abd54ab40fe0c58eed09868e2fa8ff3e1d8a0 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 22 Jan 2019 19:02:30 +0000 Subject: [PATCH] Merging r351753: ------------------------------------------------------------------------ r351753 | spatel | 2019-01-21 18:30:14 +0100 (Mon, 21 Jan 2019) | 8 lines [DAGCombiner] fix crash when converting build vector to shuffle The regression test is reduced from the example shown in D56281. This does raise a question as noted in the test file: do we want to handle this pattern? I don't have a motivating example for that on x86 yet, but it seems like we could have that pattern there too, so we could avoid the back-and-forth using a shuffle. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@351857 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 +++++++++----- test/CodeGen/AArch64/build-vector-extract.ll | 22 ++++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) create mode 100644 test/CodeGen/AArch64/build-vector-extract.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ff5505c9772..6af01423ca1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16214,23 +16214,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // The build vector contains some number of undef elements and exactly // one other element. That other element must be a zero-extended scalar // extracted from a vector at a constant index to turn this into a shuffle. + // Also, require that the build vector does not implicitly truncate/extend + // its elements. // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND. + EVT VT = BV->getValueType(0); SDValue Zext = BV->getOperand(ZextElt); if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() || Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa(Zext.getOperand(0).getOperand(1))) + !isa(Zext.getOperand(0).getOperand(1)) || + Zext.getValueSizeInBits() != VT.getScalarSizeInBits()) return SDValue(); - // The zero-extend must be a multiple of the source size. + // The zero-extend must be a multiple of the source size, and we must be + // building a vector of the same size as the source of the extract element. SDValue Extract = Zext.getOperand(0); unsigned DestSize = Zext.getValueSizeInBits(); unsigned SrcSize = Extract.getValueSizeInBits(); - if (DestSize % SrcSize != 0) + if (DestSize % SrcSize != 0 || + Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits()) return SDValue(); // Create a shuffle mask that will combine the extracted element with zeros // and undefs. - int ZextRatio = DestSize / SrcSize; + int ZextRatio = DestSize / SrcSize; int NumMaskElts = NumBVOps * ZextRatio; SmallVector ShufMask(NumMaskElts, -1); for (int i = 0; i != NumMaskElts; ++i) { @@ -16260,7 +16266,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, ShufMask); - return DAG.getBitcast(BV->getValueType(0), Shuf); + return DAG.getBitcast(VT, Shuf); } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT diff --git a/test/CodeGen/AArch64/build-vector-extract.ll b/test/CodeGen/AArch64/build-vector-extract.ll new file mode 100644 index 00000000000..bba3a22cf33 --- /dev/null +++ b/test/CodeGen/AArch64/build-vector-extract.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s + +; This would crash because we did not expect to create +; a shuffle for a vector where the source operand is +; not the same size as the result. +; TODO: Should we handle this pattern? Ie, is moving to/from +; registers the optimal code? + +define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) { +; CHECK-LABEL: larger_bv_than_source: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ret + %t1 = extractelement <4 x i16> %t0, i32 2 + %vgetq_lane = zext i16 %t1 to i32 + %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0 + ret <4 x i32> %t2 +} + -- 2.50.1