From a2a6bc5fb1c83aafdf1af76dbf913143be3e2737 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Mon, 8 Jul 2019 19:48:52 +0000 Subject: [PATCH] [InstCombine] fold insertelement into splat of same scalar Forming the canonical splat shuffle improves analysis and may allow follow-on transforms (although some possibilities are missing as shown in the test diffs). The backend generically turns these patterns into build_vector, so there should be no codegen regressions. All targets are expected to be able to lower splats efficiently. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365379 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineVectorOps.cpp | 37 +++++++++++++++++++ test/Transforms/InstCombine/broadcast.ll | 5 +-- .../InstCombine/insert-extract-shuffle.ll | 11 ++++-- 3 files changed, 47 insertions(+), 6 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index b928ae5b3d1..dc9abdd7f47 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -732,6 +732,40 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) { return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask)); } +/// Try to fold an insert element into an existing splat shuffle by changing +/// the shuffle's mask to include the index of this insert element. +static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) { + // Check if the vector operand of this insert is a canonical splat shuffle. + auto *Shuf = dyn_cast(InsElt.getOperand(0)); + if (!Shuf || !Shuf->isZeroEltSplat()) + return nullptr; + + // Check for a constant insertion index. + uint64_t IdxC; + if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) + return nullptr; + + // Check if the splat shuffle's input is the same as this insert's scalar op. + Value *X = InsElt.getOperand(1); + Value *Op0 = Shuf->getOperand(0); + if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt()))) + return nullptr; + + // Replace the shuffle mask element at the index of this insert with a zero. + // For example: + // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1 + // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef> + unsigned NumMaskElts = Shuf->getType()->getVectorNumElements(); + SmallVector NewMaskVec(NumMaskElts); + Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext()); + Constant *Zero = ConstantInt::getNullValue(I32Ty); + for (unsigned i = 0; i != NumMaskElts; ++i) + NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i); + + Constant *NewMask = ConstantVector::get(NewMaskVec); + return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask); +} + /// If we have an insertelement instruction feeding into another insertelement /// and the 2nd is inserting a constant into the vector, canonicalize that /// constant insertion before the insertion of a variable: @@ -950,6 +984,9 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE)) return Broadcast; + if (Instruction *Splat = foldInsEltIntoSplat(IE)) + return Splat; + return nullptr; } diff --git a/test/Transforms/InstCombine/broadcast.ll b/test/Transforms/InstCombine/broadcast.ll index 30c552cb1bd..49ff3129a68 100644 --- a/test/Transforms/InstCombine/broadcast.ll +++ b/test/Transforms/InstCombine/broadcast.ll @@ -126,14 +126,13 @@ define <1 x float> @bad4(float %arg) { } ; Multiple undef elements are ok. -; TODO: Multiple uses triggers the transform at %t4, but we could form another splat from %t6 and simplify? +; TODO: Multiple uses triggers the transform at %t4, but we should sink/scalarize/CSE the splats? define <4 x float> @splat_undef3(float %arg) { ; CHECK-LABEL: @splat_undef3( ; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0 ; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2 -; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3 +; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]] ; CHECK-NEXT: ret <4 x float> [[T7]] ; diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll index 7603298c26c..3a17acd2432 100644 --- a/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -502,8 +502,7 @@ define <4 x float> @insert_nonzero_index_splat_wrong_index(float %x, i32 %index) define <4 x float> @insert_in_splat(float %x) { ; CHECK-LABEL: @insert_in_splat( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0 -; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> undef, float %x, i32 0 @@ -518,7 +517,7 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { ; CHECK-NEXT: call void @use(<4 x float> [[XV]]) ; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: call void @use(<4 x float> [[SPLAT]]) -; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3 +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %xv = insertelement <4 x float> undef, float %x, i32 0 @@ -529,6 +528,8 @@ define <4 x float> @insert_in_splat_extra_uses(float %x) { ret <4 x float> %r } +; Negative test - not a constant index insert + define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { ; CHECK-LABEL: @insert_in_splat_variable_index( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0 @@ -542,6 +543,8 @@ define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) { ret <4 x float> %r } +; Negative test - not a splat shuffle + define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_in_nonsplat( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0 @@ -555,6 +558,8 @@ define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) { ret <4 x float> %r } +; Negative test - not a splat shuffle + define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) { ; CHECK-LABEL: @insert_in_nonsplat2( ; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0 -- 2.40.0