CurrIE = NextIE;
}
- // Make sure we've seen an insert into every element.
- if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+ // If this is just a single insertelement (not a sequence), we are done.
+ if (FirstIE == &InsElt)
return nullptr;
+ // If we are not inserting into an undef vector, make sure we've seen an
+ // insert into every element.
+ // TODO: If the base vector is not undef, it might be better to create a splat
+ // and then a select-shuffle (blend) with the base vector.
+ if (!isa<UndefValue>(FirstIE->getOperand(0)))
+ if (any_of(ElementPresent, [](bool Present) { return !Present; }))
+ return nullptr;
+
// Create the insert + shuffle.
Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
UndefValue *UndefVec = UndefValue::get(VecTy);
if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
- Constant *ZeroMask = ConstantVector::getSplat(NumElements, Zero);
- return new ShuffleVectorInst(FirstIE, UndefVec, ZeroMask);
+ // Splat from element 0, but replace absent elements with undef in the mask.
+ SmallVector<Constant *, 16> Mask(NumElements, Zero);
+ for (unsigned i = 0; i != NumElements; ++i)
+ if (!ElementPresent[i])
+ Mask[i] = UndefValue::get(Int32Ty);
+
+ return new ShuffleVectorInst(FirstIE, UndefVec, ConstantVector::get(Mask));
}
/// If we have an insertelement instruction feeding into another insertelement
ret <4 x float> %res
}
-define <4 x float> @bad1(float %arg) {
-; CHECK-LABEL: @bad1(
-; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 1
-; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
-; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
+; The insert is changed to allow the canonical shuffle-splat pattern from element 0.
+
+define <4 x float> @splat_undef1(float %arg) {
+; CHECK-LABEL: @splat_undef1(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
+; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> undef, float %arg, i32 1
ret <4 x float> %t6
}
-define <4 x float> @bad2(float %arg) {
-; CHECK-LABEL: @bad2(
+; Re-uses the existing first insertelement.
+
+define <4 x float> @splat_undef2(float %arg) {
+; CHECK-LABEL: @splat_undef2(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
-; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 2
-; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
+; CHECK-NEXT: [[T6:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 undef, i32 0, i32 0>
; CHECK-NEXT: ret <4 x float> [[T6]]
;
%t = insertelement <4 x float> undef, float %arg, i32 0
ret <1 x float> %t
}
-define <4 x float> @bad5(float %arg) {
-; CHECK-LABEL: @bad5(
+; Multiple undef elements are ok.
+; TODO: Multiple uses triggers the transform at %t4, but we could form another splat from %t6 and simplify?
+
+define <4 x float> @splat_undef3(float %arg) {
+; CHECK-LABEL: @splat_undef3(
; CHECK-NEXT: [[T:%.*]] = insertelement <4 x float> undef, float [[ARG:%.*]], i32 0
-; CHECK-NEXT: [[T4:%.*]] = insertelement <4 x float> [[T]], float [[ARG]], i32 1
+; CHECK-NEXT: [[T4:%.*]] = shufflevector <4 x float> [[T]], <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
; CHECK-NEXT: [[T5:%.*]] = insertelement <4 x float> [[T4]], float [[ARG]], i32 2
; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x float> [[T5]], float [[ARG]], i32 3
; CHECK-NEXT: [[T7:%.*]] = fadd <4 x float> [[T6]], [[T4]]