/// \brief Match a value, capturing it if we match.
inline bind_ty<Value> m_Value(Value *&V) { return V; }
+/// \brief Match an instruction, capturing it if we match.
+inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
+
/// \brief Match a binary operator, capturing it if we match.
inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
}
+//===----------------------------------------------------------------------===//
+// Matchers for overflow check patterns: e.g. (a + b) u< a
+//
+
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+struct UAddWithOverflow_match {
+ LHS_t L;
+ RHS_t R;
+ Sum_t S;
+
+ UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
+ : L(L), R(R), S(S) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ Value *ICmpLHS, *ICmpRHS;
+ ICmpInst::Predicate Pred;
+ if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
+ return false;
+
+ Value *AddLHS, *AddRHS;
+ auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
+
+ // (a + b) u< a, (a + b) u< b
+ if (Pred == ICmpInst::ICMP_ULT)
+ if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
+ return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
+
+ // a >u (a + b), b >u (a + b)
+ if (Pred == ICmpInst::ICMP_UGT)
+ if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
+ return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
+
+ return false;
+ }
+};
+
+/// \brief Match an icmp instruction checking for unsigned overflow on addition.
+///
+/// S is matched to the addition whose result is being checked for overflow, and
+/// L and R are matched to the LHS and RHS of S.
+template <typename LHS_t, typename RHS_t, typename Sum_t>
+UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
+m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
+ return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
+}
+
/// \brief Match an 'unordered' floating point minimum function.
/// Floating point has one special value 'NaN'. Therefore, there is no total
/// order. However, if we can ignore the 'NaN' value (for example, because of a
return SinkCast(CI);
}
-/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// CombineUAddWithOverflow - try to combine CI into a call to the
+/// llvm.uadd.with.overflow intrinsic if possible.
+///
+/// Return true if any changes were made.
+static bool CombineUAddWithOverflow(CmpInst *CI) {
+ Value *A, *B;
+ Instruction *AddI;
+ if (!match(CI,
+ m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+ return false;
+
+ Type *Ty = AddI->getType();
+ if (!isa<IntegerType>(Ty))
+ return false;
+
+ // We don't want to move around uses of condition values this late, so we we
+ // check if it is legal to create the call to the intrinsic in the basic
+ // block containing the icmp:
+
+ if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+ return false;
+
+#ifndef NDEBUG
+ // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
+ // for now:
+ if (AddI->hasOneUse())
+ assert(*AddI->user_begin() == CI && "expected!");
+#endif
+
+ Module *M = CI->getParent()->getParent()->getParent();
+ Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
+
+ auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
+
+ auto *UAddWithOverflow =
+ CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
+ auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
+ auto *Overflow =
+ ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
+
+ CI->replaceAllUsesWith(Overflow);
+ AddI->replaceAllUsesWith(UAdd);
+ CI->eraseFromParent();
+ AddI->eraseFromParent();
+ return true;
+}
+
+/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
/// the number of virtual registers that must be created and coalesced. This is
/// a clear win except on targets with multiple condition code registers
/// (PowerPC), where it might lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool OptimizeCmpExpression(CmpInst *CI) {
+static bool SinkCmpExpression(CmpInst *CI) {
BasicBlock *DefBB = CI->getParent();
/// InsertedCmp - Only insert a cmp in each block once.
return MadeChange;
}
+static bool OptimizeCmpExpression(CmpInst *CI) {
+ if (SinkCmpExpression(CI))
+ return true;
+
+ if (CombineUAddWithOverflow(CI))
+ return true;
+
+ return false;
+}
+
/// isExtractBitsCandidateUse - Check if the candidates could
/// be combined with shift instruction, which includes:
/// 1. Truncate instruction
return ExtractValueInst::Create(Call, 1, "sadd.overflow");
}
-static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
- InstCombiner &IC) {
- // Don't bother doing this transformation for pointers, don't do it for
- // vectors.
- if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
-
- // If the add is a constant expr, then we don't bother transforming it.
- Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
- if (!OrigAdd) return nullptr;
-
- Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
-
- // Put the new code above the original add, in case there are any uses of the
- // add between the add and the compare.
- InstCombiner::BuilderTy *Builder = IC.Builder;
- Builder->SetInsertPoint(OrigAdd);
-
- Module *M = I.getParent()->getParent()->getParent();
- Type *Ty = LHS->getType();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
- CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
- Value *Add = Builder->CreateExtractValue(Call, 0);
-
- IC.ReplaceInstUsesWith(*OrigAdd, Add);
-
- // The original icmp gets replaced with the overflow value.
- return ExtractValueInst::Create(Call, 1, "uadd.overflow");
-}
-
bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
Value *RHS, Instruction &OrigI,
Value *&Result, Constant *&Overflow) {
return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
}
- // (a+b) <u a --> llvm.uadd.with.overflow.
- // (a+b) <u b --> llvm.uadd.with.overflow.
- if (I.getPredicate() == ICmpInst::ICMP_ULT &&
- match(Op0, m_Add(m_Value(A), m_Value(B))) &&
- (Op1 == A || Op1 == B))
- if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
- return R;
-
- // a >u (a+b) --> llvm.uadd.with.overflow.
- // b >u (a+b) --> llvm.uadd.with.overflow.
- if (I.getPredicate() == ICmpInst::ICMP_UGT &&
- match(Op1, m_Add(m_Value(A), m_Value(B))) &&
- (Op0 == A || Op0 == B))
- if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
- return R;
+ Instruction *AddI = nullptr;
+ if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
+ m_Instruction(AddI))) &&
+ isa<IntegerType>(A->getType())) {
+ Value *Result;
+ Constant *Overflow;
+ if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
+ Overflow)) {
+ ReplaceInstUsesWith(*AddI, Result);
+ return ReplaceInstUsesWith(I, Overflow);
+ }
+ }
// (zext a) * (zext b) --> llvm.umul.with.overflow.
if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
entry:
; CHECK-LABEL: test1:
-; CHECK: cmpl %ecx, %eax
-; CHECK-NOT: addl
-; CHECK: adcl $0, %eax
- %add4 = add i32 %x, %sum
- %cmp = icmp ult i32 %add4, %x
- %inc = zext i1 %cmp to i32
- %z.0 = add i32 %add4, %inc
- ret i32 %z.0
-}
-
-; Instcombine transforms test1 into test2:
-; CHECK-LABEL: test2:
; CHECK: movl
; CHECK-NEXT: addl
; CHECK-NEXT: adcl $0
; CHECK-NEXT: ret
-define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
-entry:
- %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
- %0 = extractvalue { i32, i1 } %uadd, 0
- %cmp = extractvalue { i32, i1 } %uadd, 1
+ %add4 = add i32 %x, %sum
+ %cmp = icmp ult i32 %add4, %x
%inc = zext i1 %cmp to i32
- %z.0 = add i32 %0, %inc
+ %z.0 = add i32 %add4, %inc
ret i32 %z.0
}
; <rdar://problem/12579915>
-define i32 @test3(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
+define i32 @test2(i32 %x, i32 %y, i32 %res) nounwind uwtable readnone ssp {
entry:
%cmp = icmp ugt i32 %x, %y
%dec = sext i1 %cmp to i32
%dec.res = add nsw i32 %dec, %res
ret i32 %dec.res
-; CHECK-LABEL: test3:
+; CHECK-LABEL: test2:
; CHECK: cmpl
; CHECK: sbbl
; CHECK: ret
}
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
--- /dev/null
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK-LABEL: @test1(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+entry:
+ %add = add i64 %b, %a
+ %cmp = icmp ult i64 %add, %a
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+}
+
+; CHECK-LABEL: @test2(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+entry:
+ %add = add i64 %b, %a
+ %cmp = icmp ult i64 %add, %b
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+}
+
+; CHECK-LABEL: @test3(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+entry:
+ %add = add i64 %b, %a
+ %cmp = icmp ugt i64 %b, %add
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+}
+
+; CHECK-LABEL: @test4(
+; CHECK: llvm.uadd.with.overflow
+; CHECK: extractvalue
+; CHECK: extractvalue
+; CHECK: select
+define i64 @test4(i64 %a, i64 %b, i1 %c) nounwind ssp {
+entry:
+ %add = add i64 %b, %a
+ %cmp = icmp ugt i64 %b, %add
+ br i1 %c, label %next, label %exit
+
+ next:
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+
+ exit:
+ ret i64 0
+}
+
+; CHECK-LABEL: @test5(
+; CHECK-NOT: llvm.uadd.with.overflow
+; CHECK: next
+define i64 @test5(i64 %a, i64 %b, i64* %ptr, i1 %c) nounwind ssp {
+entry:
+ %add = add i64 %b, %a
+ store i64 %add, i64* %ptr
+ %cmp = icmp ugt i64 %b, %add
+ br i1 %c, label %next, label %exit
+
+ next:
+ %Q = select i1 %cmp, i64 %b, i64 42
+ ret i64 %Q
+
+ exit:
+ ret i64 0
+}
; CHECK: ret i8
}
-; CHECK-LABEL: @test5(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test5(i64 %a, i64 %b) nounwind ssp {
-entry:
- %add = add i64 %b, %a
- %cmp = icmp ult i64 %add, %a
- %Q = select i1 %cmp, i64 %b, i64 42
- ret i64 %Q
-}
-
-; CHECK-LABEL: @test6(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test6(i64 %a, i64 %b) nounwind ssp {
-entry:
- %add = add i64 %b, %a
- %cmp = icmp ult i64 %add, %b
- %Q = select i1 %cmp, i64 %b, i64 42
- ret i64 %Q
-}
-
-; CHECK-LABEL: @test7(
-; CHECK: llvm.uadd.with.overflow
-; CHECK: ret i64
-define i64 @test7(i64 %a, i64 %b) nounwind ssp {
-entry:
- %add = add i64 %b, %a
- %cmp = icmp ugt i64 %b, %add
- %Q = select i1 %cmp, i64 %b, i64 42
- ret i64 %Q
-}
-
; CHECK-LABEL: @test8(
; PR11438
; This is @test1, but the operands are not sign-extended. Make sure