return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
}
+/// Try to fold an insert element into an existing splat shuffle by changing
+/// the shuffle's mask to include the index of this insert element.
+static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
+ // Check if the vector operand of this insert is a canonical splat shuffle.
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
+ if (!Shuf || !Shuf->isZeroEltSplat())
+ return nullptr;
+
+ // Check for a constant insertion index.
+ uint64_t IdxC;
+ if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
+ return nullptr;
+
+ // Check if the splat shuffle's input is the same as this insert's scalar op.
+ Value *X = InsElt.getOperand(1);
+ Value *Op0 = Shuf->getOperand(0);
+ if (!match(Op0, m_InsertElement(m_Undef(), m_Specific(X), m_ZeroInt())))
+ return nullptr;
+
+ // Replace the shuffle mask element at the index of this insert with a zero.
+ // For example:
+ // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
+ // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+ unsigned NumMaskElts = Shuf->getType()->getVectorNumElements();
+ SmallVector<Constant *, 16> NewMaskVec(NumMaskElts);
+ Type *I32Ty = IntegerType::getInt32Ty(Shuf->getContext());
+ Constant *Zero = ConstantInt::getNullValue(I32Ty);
+ for (unsigned i = 0; i != NumMaskElts; ++i)
+ NewMaskVec[i] = i == IdxC ? Zero : Shuf->getMask()->getAggregateElement(i);
+
+ Constant *NewMask = ConstantVector::get(NewMaskVec);
+ return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
+}
+
/// If we have an insertelement instruction feeding into another insertelement
/// and the 2nd is inserting a constant into the vector, canonicalize that
/// constant insertion before the insertion of a variable:
if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
return Broadcast;
+ if (Instruction *Splat = foldInsEltIntoSplat(IE))
+ return Splat;
+
return nullptr;
}
}
; Multiple undef elements are ok.
-; TODO: Multiple uses triggers the transform at %t4, but we could form another splat from %t6 and simplify?
+; TODO: Multiple uses triggers the transform at %t4, but we should sink/scalarize/CSE the splats?
define <4 x float> @splat_undef3(float %arg) {
; CHECK-LABEL: @splat_undef3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
-; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
-; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
+; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]]
; CHECK-NEXT: ret <4 x float> [[T7]]
;
define <4 x float> @insert_in_splat(float %x) {
; CHECK-LABEL: @insert_in_splat(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
-; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> undef, float %x, i32 0
; CHECK-NEXT: call void @use(<4 x float> [[XV]])
; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 undef>
; CHECK-NEXT: call void @use(<4 x float> [[SPLAT]])
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[SPLAT]], float [[X]], i32 3
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[XV]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%xv = insertelement <4 x float> undef, float %x, i32 0
ret <4 x float> %r
}
+; Negative test - not a constant index insert
+
define <4 x float> @insert_in_splat_variable_index(float %x, i32 %y) {
; CHECK-LABEL: @insert_in_splat_variable_index(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
ret <4 x float> %r
}
+; Negative test - not a splat shuffle
+
define <4 x float> @insert_in_nonsplat(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 0
ret <4 x float> %r
}
+; Negative test - not a splat shuffle
+
define <4 x float> @insert_in_nonsplat2(float %x, <4 x float> %y) {
; CHECK-LABEL: @insert_in_nonsplat2(
; CHECK-NEXT: [[XV:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[X:%.*]], i32 0