VPWidenIntOrFpInductionRecipe *tryToOptimizeInduction(Instruction *I,
VFRange &Range);
- /// Check if \I can be widened within the given VF \p Range. If \I can be
- /// widened for Range.Start, extend \p LastWidenRecipe to include \p I if
- /// possible or else build a new VPWidenRecipe for it, and return the
- /// VPWidenRecipe that includes \p I. If \p I cannot be widened for
- /// Range.Start \return null. Range.End may be decreased to ensure same
- /// decision from \p Range.Start to \p Range.End.
- VPWidenRecipe *tryToWiden(Instruction *I, VPWidenRecipe *LastWidenRecipe,
- VFRange &Range);
+ /// Check if \p I can be widened within the given VF \p Range. If \p I can be
+ /// widened for \p Range.Start, check if the last recipe of \p VPBB can be
+ /// extended to include \p I or else build a new VPWidenRecipe for it and
+ /// append it to \p VPBB. Return true if \p I can be widened for Range.Start,
+ /// false otherwise. Range.End may be decreased to ensure same decision from
+ /// \p Range.Start to \p Range.End.
+ bool tryToWiden(Instruction *I, VPBasicBlock *VPBB, VFRange &Range);
/// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
/// is predicated. \return \p VPBB augmented with this new recipe if \p I is
return nullptr;
}
-VPWidenRecipe *LoopVectorizationPlanner::tryToWiden(
- Instruction *I, VPWidenRecipe *LastWidenRecipe, VFRange &Range) {
+bool LoopVectorizationPlanner::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
+ VFRange &Range) {
if (Legal->isScalarWithPredication(I))
- return nullptr;
+ return false;
auto IsVectorizableOpcode = [](unsigned Opcode) {
switch (Opcode) {
};
if (!IsVectorizableOpcode(I->getOpcode()))
- return nullptr;
+ return false;
if (CallInst *CI = dyn_cast<CallInst>(I)) {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
ID == Intrinsic::lifetime_start))
- return nullptr;
+ return false;
}
auto willWiden = [&](unsigned VF) -> bool {
};
if (!getDecisionAndClampRange(willWiden, Range))
- return nullptr;
+ return false;
// Success: widen this instruction. We optimize the common case where
// consecutive instructions can be represented by a single recipe.
- if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
- return LastWidenRecipe;
- return new VPWidenRecipe(I);
+ if (!VPBB->empty()) {
+ VPWidenRecipe *LastWidenRecipe = dyn_cast<VPWidenRecipe>(&VPBB->back());
+ if (LastWidenRecipe && LastWidenRecipe->appendInstruction(I))
+ return true;
+ }
+
+ VPBB->appendRecipe(new VPWidenRecipe(I));
+ return true;
}
VPBasicBlock *LoopVectorizationPlanner::handleReplication(
auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
VPBB->setOneSuccessor(FirstVPBBForBB);
VPBB = FirstVPBBForBB;
- VPWidenRecipe *LastWidenRecipe = nullptr;
std::vector<Instruction *> Ingredients;
// Check if Instr is to be widened by a general VPWidenRecipe, after
// having first checked for specific widening recipes that deal with
// Interleave Groups, Inductions and Phi nodes.
- if ((Recipe = tryToWiden(Instr, LastWidenRecipe, Range))) {
- if (Recipe != LastWidenRecipe)
- VPBB->appendRecipe(Recipe);
- LastWidenRecipe = cast<VPWidenRecipe>(Recipe);
+ if (tryToWiden(Instr, VPBB, Range))
continue;
- }
// Otherwise, if all widening options failed, Instruction is to be
// replicated. This may create a successor for VPBB.
ret void
}
+; PR34711: given three consecutive instructions such that the first will be
+; widened, the second is a cast that will be widened and needs to sink after the
+; third, and the third is a first-order-recurring load that will be replicated
+; instead of widened. Although the cast and the first instruction will both be
+; widened, and are originally adjacent to each other, make sure the replicated
+; load ends up appearing between them.
+;
+; void PR34711(short[2] *a, int *b, int *c, int n) {
+; for(int i = 0; i < n; i++) {
+; c[i] = 7;
+; b[i] = (a[i][0] * a[i][1]);
+; }
+; }
+;
+; SINK-AFTER-LABEL: @PR34711
+; Check that the sext sank after the load in the vector loop.
+; SINK-AFTER: vector.body
+; SINK-AFTER: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ {{.*}}, %vector.body ]
+; SINK-AFTER: %[[VSHUF:.+]] = shufflevector <4 x i16> %vector.recur, <4 x i16> %{{.*}}, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER: %[[VCONV:.+]] = sext <4 x i16> %[[VSHUF]] to <4 x i32>
+; SINK-AFTER: %[[VCONV3:.+]] = sext <4 x i16> {{.*}} to <4 x i32>
+; SINK-AFTER: mul nsw <4 x i32> %[[VCONV3]], %[[VCONV]]
+;
+define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
+entry:
+ %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
+ %.pre = load i16, i16* %pre.index
+ br label %for.body
+
+for.body:
+ %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
+ %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
+ store i32 7, i32* %arraycidx ; 1st instruction, to be widened.
+ %conv = sext i16 %0 to i32 ; 2nd, cast to sink after third.
+ %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
+ %conv3 = sext i16 %1 to i32
+ %mul = mul nsw i32 %conv3, %conv
+ %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 %mul, i32* %arrayidx5
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
; void no_sink_after(short *a, int n, int *b) {
; for(int i = 0; i < n; i++)
; b[i] = ((a[i] + 2) * a[i + 1]);