From: Florian Hahn Date: Wed, 30 Aug 2017 10:54:21 +0000 (+0000) Subject: [InstCombine] Fold insert sequence if first ins has multiple users. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=785320780e1e0af846d0cf3ad25248bf4b35efab;p=llvm [InstCombine] Fold insert sequence if first ins has multiple users. Summary: If the first insertelement instruction has multiple users and inserts at position 0, we can re-use this instruction when folding a chain of insertelement instructions. As we need to generate the first insertelement instruction anyways, this should be a strict improvement. We could get rid of the restriction of inserting at position 0 by creating a different shufflemask, but it is probably worth to keep the first insertelement instruction with position 0, as this is easier to do efficiently than at other positions I think. Reviewers: grosser, mkuper, fpetrogalli, efriedma Reviewed By: fpetrogalli Subscribers: gareevroman, llvm-commits Differential Revision: https://reviews.llvm.org/D37064 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312110 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index dd71a31b644..7380ec21cae 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -615,6 +615,7 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) { Value *SplatVal = InsElt.getOperand(1); InsertElementInst *CurrIE = &InsElt; SmallVector ElementPresent(NumElements, false); + InsertElementInst *FirstIE = nullptr; // Walk the chain backwards, keeping track of which indices we inserted into, // until we hit something that isn't an insert of the splatted value. @@ -623,12 +624,18 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) { if (!Idx || CurrIE->getOperand(1) != SplatVal) return nullptr; - // Check none of the intermediate steps have any additional uses. - if ((CurrIE != &InsElt) && !CurrIE->hasOneUse()) + InsertElementInst *NextIE = + dyn_cast(CurrIE->getOperand(0)); + // Check none of the intermediate steps have any additional uses, except + // for the root insertelement instruction, which can be re-used, if it + // inserts at position 0. + if (CurrIE != &InsElt && + (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero()))) return nullptr; ElementPresent[Idx->getZExtValue()] = true; - CurrIE = dyn_cast(CurrIE->getOperand(0)); + FirstIE = CurrIE; + CurrIE = NextIE; } // Make sure we've seen an insert into every element. @@ -636,9 +643,14 @@ static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) { return nullptr; // All right, create the insert + shuffle. - Instruction *InsertFirst = InsertElementInst::Create( - UndefValue::get(VT), SplatVal, - ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0), "", &InsElt); + Instruction *InsertFirst; + if (cast(FirstIE->getOperand(2))->isZero()) + InsertFirst = FirstIE; + else + InsertFirst = InsertElementInst::Create( + UndefValue::get(VT), SplatVal, + ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0), + "", &InsElt); Constant *ZeroMask = ConstantAggregateZero::get( VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements)); diff --git a/test/Transforms/InstCombine/broadcast.ll b/test/Transforms/InstCombine/broadcast.ll index d852e2970fc..8485cd9c53f 100644 --- a/test/Transforms/InstCombine/broadcast.ll +++ b/test/Transforms/InstCombine/broadcast.ll @@ -51,6 +51,22 @@ define <4 x float> @good4(float %arg) { ret <4 x float> %tmp7 } +; CHECK-LABEL: @good5( +; CHECK-NEXT: %ins1 = insertelement <4 x float> undef, float %v, i32 0 +; CHECK-NEXT: %a1 = fadd <4 x float> %ins1, %ins1 +; CHECK-NEXT: %ins4 = shufflevector <4 x float> %ins1, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: %res = fadd <4 x float> %a1, %ins4 +; CHECK-NEXT: ret <4 x float> %res +define <4 x float> @good5(float %v) { + %ins1 = insertelement <4 x float> undef, float %v, i32 0 + %a1 = fadd <4 x float> %ins1, %ins1 + %ins2 = insertelement<4 x float> %ins1, float %v, i32 1 + %ins3 = insertelement<4 x float> %ins2, float %v, i32 2 + %ins4 = insertelement<4 x float> %ins3, float %v, i32 3 + %res = fadd <4 x float> %a1, %ins4 + ret <4 x float> %res +} + ; CHECK-LABEL: bad1 ; CHECK-NOT: shufflevector define <4 x float> @bad1(float %arg) { @@ -107,3 +123,15 @@ define <4 x float> @bad6(float %arg, i32 %k) { %tmp6 = insertelement <4 x float> %tmp5, float %arg, i32 3 ret <4 x float> %tmp6 } + +; CHECK-LABEL: @bad7( +; CHECK-NOT: shufflevector +define <4 x float> @bad7(float %v) { + %ins1 = insertelement <4 x float> undef, float %v, i32 1 + %a1 = fadd <4 x float> %ins1, %ins1 + %ins2 = insertelement<4 x float> %ins1, float %v, i32 2 + %ins3 = insertelement<4 x float> %ins2, float %v, i32 3 + %ins4 = insertelement<4 x float> %ins3, float %v, i32 0 + %res = fadd <4 x float> %a1, %ins4 + ret <4 x float> %res +}