Optional<LoopICmp> parseLoopLatchICmp();
+ /// Return an insertion point suitable for inserting a safe to speculate
+ /// instruction whose only user will be 'User' which has operands 'Ops'. A
+ /// trivial result would be the at the User itself, but we try to return a
+ /// loop invariant location if possible.
+ Instruction *findInsertPt(Instruction *User, ArrayRef<Value*> Ops);
+
bool CanExpand(const SCEV* S);
Value *expandCheck(SCEVExpander &Expander, IRBuilder<> &Builder,
ICmpInst::Predicate Pred, const SCEV *LHS,
return Step->isOne() || (Step->isAllOnesValue() && EnableCountDownLoop);
}
+Instruction *LoopPredication::findInsertPt(Instruction *Use,
+ ArrayRef<Value*> Ops) {
+ for (Value *Op : Ops)
+ if (!L->isLoopInvariant(Op))
+ return Use;
+ return Preheader->getTerminator();
+}
+
bool LoopPredication::CanExpand(const SCEV* S) {
return SE->isLoopInvariant(S, L) && isSafeToExpand(S, *SE);
}
TotalWidened += NumWidened;
// Emit the new guard condition
- Builder.SetInsertPoint(Guard);
+ Builder.SetInsertPoint(findInsertPt(Guard, Checks));
Value *LastCheck = nullptr;
for (auto *Check : Checks)
if (!LastCheck)
TotalWidened += NumWidened;
// Emit the new guard condition
- Builder.SetInsertPoint(BI);
+ Builder.SetInsertPoint(findInsertPt(BI, Checks));
Value *LastCheck = nullptr;
for (auto *Check : Checks)
if (!LastCheck)
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i32 [[N]], [[LENGTH_1:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 0, [[LENGTH_1]]
; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP4]], [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[TMP5]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[TMP5]]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP6]], i32 9) [ "deopt"() ]
; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i32 [[N]], [[LENGTH_1:%.*]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 0, [[LENGTH_1]]
; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[TMP8]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ]
-; CHECK-NEXT: [[TMP9:%.*]] = and i1 [[TMP2]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = and i1 [[TMP9]], [[TMP8]]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP10]], i32 9) [ "deopt"() ]
; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i32 16, [[TMP4]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]]
; CHECK-NEXT: [[TMP7:%.*]] = and i1 [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_TRUNC_16:%.*]] = trunc i64 [[IV]] to i16
; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC_32]], [[OFFA]]
; CHECK-NEXT: [[INDEXB:%.*]] = add i16 [[IV_TRUNC_16]], [[OFFB]]
-; CHECK-NEXT: [[TMP8:%.*]] = and i1 [[TMP3]], [[TMP7]]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP8]], i32 9) [ "deopt"() ]
; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64
; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, i8* [[ARRA]], i64 [[INDEXA_EXT]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule i32 15, [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i32 [[OFFA]], [[LENGTHA]]
; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP12]], [[TMP11]]
+; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP4]], [[TMP8]]
+; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[INDEXA:%.*]] = add i32 [[IV_TRUNC]], [[OFFA]]
; CHECK-NEXT: [[INDEXB:%.*]] = add i32 [[IV_TRUNC]], [[OFFB]]
-; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP4]], [[TMP8]]
-; CHECK-NEXT: [[TMP15:%.*]] = and i1 [[TMP14]], [[TMP13]]
; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP15]], i32 9) [ "deopt"() ]
; CHECK-NEXT: [[INDEXA_EXT:%.*]] = zext i32 [[INDEXA]] to i64
; CHECK-NEXT: [[ADDRA:%.*]] = getelementptr inbounds i8, i8* [[ARRA]], i64 [[INDEXA_EXT]]