From ba0f61cf5363f80e3241dc754235dfb246afe320 Mon Sep 17 00:00:00 2001 From: Jordy Rose Date: Fri, 18 Jun 2010 22:49:11 +0000 Subject: [PATCH] Fold additive constants, and support comparsions of the form $sym+const1 <> const2 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@106339 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/Expr.h | 3 +- lib/Checker/BasicConstraintManager.cpp | 124 ++++---- lib/Checker/RangeConstraintManager.cpp | 301 +++++++++++------- lib/Checker/SimpleConstraintManager.cpp | 157 ++++++--- lib/Checker/SimpleConstraintManager.h | 26 +- lib/Checker/SimpleSValuator.cpp | 125 +++++--- .../additive-folding-range-constraints.c | 99 ++++++ test/Analysis/additive-folding.c | 163 ++++++++++ 8 files changed, 726 insertions(+), 272 deletions(-) create mode 100644 test/Analysis/additive-folding-range-constraints.c create mode 100644 test/Analysis/additive-folding.c diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index 248cf7bbfd..912894ee80 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -2169,7 +2169,8 @@ public: /// predicates to categorize the respective opcodes. bool isMultiplicativeOp() const { return Opc >= Mul && Opc <= Rem; } - bool isAdditiveOp() const { return Opc == Add || Opc == Sub; } + static bool isAdditiveOp(Opcode Opc) { return Opc == Add || Opc == Sub; } + bool isAdditiveOp() const { return isAdditiveOp(Opc); } static bool isShiftOp(Opcode Opc) { return Opc == Shl || Opc == Shr; } bool isShiftOp() const { return isShiftOp(Opc); } diff --git a/lib/Checker/BasicConstraintManager.cpp b/lib/Checker/BasicConstraintManager.cpp index e89546ecb0..eee5c59466 100644 --- a/lib/Checker/BasicConstraintManager.cpp +++ b/lib/Checker/BasicConstraintManager.cpp @@ -54,22 +54,28 @@ public: ISetFactory(statemgr.getAllocator()) {} const GRState* AssumeSymNE(const GRState* state, SymbolRef sym, - const llvm::APSInt& V); + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); const GRState* AssumeSymEQ(const GRState* state, SymbolRef sym, - const llvm::APSInt& V); + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); const GRState* AssumeSymLT(const GRState* state, SymbolRef sym, - const llvm::APSInt& V); + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); const GRState* AssumeSymGT(const GRState* state, SymbolRef sym, - const llvm::APSInt& V); + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); const GRState* AssumeSymGE(const GRState* state, SymbolRef sym, - const llvm::APSInt& V); + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); const GRState* AssumeSymLE(const GRState* state, SymbolRef sym, - const llvm::APSInt& V); + const llvm::APSInt& V, + const llvm::APSInt& Adjustment); const GRState* AddEQ(const GRState* state, SymbolRef sym, const llvm::APSInt& V); @@ -94,46 +100,52 @@ ConstraintManager* clang::CreateBasicConstraintManager(GRStateManager& statemgr, return new BasicConstraintManager(statemgr, subengine); } + const GRState* BasicConstraintManager::AssumeSymNE(const GRState *state, SymbolRef sym, - const llvm::APSInt& V) { - // First, determine if sym == X, where X != V. + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // First, determine if sym == X, where X+Adjustment != V. + llvm::APSInt Adjusted = V-Adjustment; if (const llvm::APSInt* X = getSymVal(state, sym)) { - bool isFeasible = (*X != V); + bool isFeasible = (*X != Adjusted); return isFeasible ? state : NULL; } - // Second, determine if sym != V. - if (isNotEqual(state, sym, V)) + // Second, determine if sym+Adjustment != V. + if (isNotEqual(state, sym, Adjusted)) return state; // If we reach here, sym is not a constant and we don't know if it is != V. // Make that assumption. - return AddNE(state, sym, V); + return AddNE(state, sym, Adjusted); } -const GRState *BasicConstraintManager::AssumeSymEQ(const GRState *state, - SymbolRef sym, - const llvm::APSInt &V) { - // First, determine if sym == X, where X != V. +const GRState* +BasicConstraintManager::AssumeSymEQ(const GRState *state, SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // First, determine if sym == X, where X+Adjustment != V. + llvm::APSInt Adjusted = V-Adjustment; if (const llvm::APSInt* X = getSymVal(state, sym)) { - bool isFeasible = *X == V; + bool isFeasible = (*X == Adjusted); return isFeasible ? state : NULL; } - // Second, determine if sym != V. - if (isNotEqual(state, sym, V)) + // Second, determine if sym+Adjustment != V. + if (isNotEqual(state, sym, Adjusted)) return NULL; // If we reach here, sym is not a constant and we don't know if it is == V. // Make that assumption. - return AddEQ(state, sym, V); + return AddEQ(state, sym, Adjusted); } -// These logic will be handled in another ConstraintManager. -const GRState *BasicConstraintManager::AssumeSymLT(const GRState *state, - SymbolRef sym, - const llvm::APSInt& V) { +// The logic for these will be handled in another ConstraintManager. +const GRState* +BasicConstraintManager::AssumeSymLT(const GRState *state, SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { // Is 'V' the smallest possible value? if (V == llvm::APSInt::getMinValue(V.getBitWidth(), V.isUnsigned())) { // sym cannot be any value less than 'V'. This path is infeasible. @@ -141,13 +153,13 @@ const GRState *BasicConstraintManager::AssumeSymLT(const GRState *state, } // FIXME: For now have assuming x < y be the same as assuming sym != V; - return AssumeSymNE(state, sym, V); + return AssumeSymNE(state, sym, V, Adjustment); } -const GRState *BasicConstraintManager::AssumeSymGT(const GRState *state, - SymbolRef sym, - const llvm::APSInt& V) { - +const GRState* +BasicConstraintManager::AssumeSymGT(const GRState *state, SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { // Is 'V' the largest possible value? if (V == llvm::APSInt::getMaxValue(V.getBitWidth(), V.isUnsigned())) { // sym cannot be any value greater than 'V'. This path is infeasible. @@ -155,56 +167,60 @@ const GRState *BasicConstraintManager::AssumeSymGT(const GRState *state, } // FIXME: For now have assuming x > y be the same as assuming sym != V; - return AssumeSymNE(state, sym, V); + return AssumeSymNE(state, sym, V, Adjustment); } -const GRState *BasicConstraintManager::AssumeSymGE(const GRState *state, - SymbolRef sym, - const llvm::APSInt &V) { - - // Reject a path if the value of sym is a constant X and !(X >= V). +const GRState* +BasicConstraintManager::AssumeSymGE(const GRState *state, SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // Reject a path if the value of sym is a constant X and !(X+Adj >= V). if (const llvm::APSInt *X = getSymVal(state, sym)) { - bool isFeasible = *X >= V; + bool isFeasible = (*X >= V-Adjustment); return isFeasible ? state : NULL; } // Sym is not a constant, but it is worth looking to see if V is the // maximum integer value. if (V == llvm::APSInt::getMaxValue(V.getBitWidth(), V.isUnsigned())) { - // If we know that sym != V, then this condition is infeasible since - // there is no other value greater than V. - bool isFeasible = !isNotEqual(state, sym, V); + llvm::APSInt Adjusted = V-Adjustment; + + // If we know that sym != V (after adjustment), then this condition + // is infeasible since there is no other value greater than V. + bool isFeasible = !isNotEqual(state, sym, Adjusted); // If the path is still feasible then as a consequence we know that - // 'sym == V' because we cannot have 'sym > V' (no larger values). + // 'sym+Adjustment == V' because there are no larger values. // Add this constraint. - return isFeasible ? AddEQ(state, sym, V) : NULL; + return isFeasible ? AddEQ(state, sym, Adjusted) : NULL; } return state; } const GRState* -BasicConstraintManager::AssumeSymLE(const GRState* state, SymbolRef sym, - const llvm::APSInt& V) { - - // Reject a path if the value of sym is a constant X and !(X <= V). +BasicConstraintManager::AssumeSymLE(const GRState *state, SymbolRef sym, + const llvm::APSInt &V, + const llvm::APSInt &Adjustment) { + // Reject a path if the value of sym is a constant X and !(X+Adj <= V). if (const llvm::APSInt* X = getSymVal(state, sym)) { - bool isFeasible = *X <= V; + bool isFeasible = (*X <= V-Adjustment); return isFeasible ? state : NULL; } // Sym is not a constant, but it is worth looking to see if V is the // minimum integer value. if (V == llvm::APSInt::getMinValue(V.getBitWidth(), V.isUnsigned())) { - // If we know that sym != V, then this condition is infeasible since - // there is no other value less than V. - bool isFeasible = !isNotEqual(state, sym, V); + llvm::APSInt Adjusted = V-Adjustment; + + // If we know that sym != V (after adjustment), then this condition + // is infeasible since there is no other value less than V. + bool isFeasible = !isNotEqual(state, sym, Adjusted); // If the path is still feasible then as a consequence we know that - // 'sym == V' because we cannot have 'sym < V' (no smaller values). + // 'sym+Adjustment == V' because there are no smaller values. // Add this constraint. - return isFeasible ? AddEQ(state, sym, V) : NULL; + return isFeasible ? AddEQ(state, sym, Adjusted) : NULL; } return state; @@ -213,7 +229,7 @@ BasicConstraintManager::AssumeSymLE(const GRState* state, SymbolRef sym, const GRState* BasicConstraintManager::AddEQ(const GRState* state, SymbolRef sym, const llvm::APSInt& V) { // Create a new state with the old binding replaced. - return state->set(sym, &V); + return state->set(sym, &state->getBasicVals().getValue(V)); } const GRState* BasicConstraintManager::AddNE(const GRState* state, SymbolRef sym, @@ -224,7 +240,7 @@ const GRState* BasicConstraintManager::AddNE(const GRState* state, SymbolRef sym GRState::IntSetTy S = T ? *T : ISetFactory.GetEmptySet(); // Now add V to the NE set. - S = ISetFactory.Add(S, &V); + S = ISetFactory.Add(S, &state->getBasicVals().getValue(V)); // Create a new state with the old binding replaced. return state->set(sym, S); @@ -243,7 +259,7 @@ bool BasicConstraintManager::isNotEqual(const GRState* state, SymbolRef sym, const ConstNotEqTy::data_type* T = state->get(sym); // See if V is present in the NE-set. - return T ? T->contains(&V) : false; + return T ? T->contains(&state->getBasicVals().getValue(V)) : false; } bool BasicConstraintManager::isEqual(const GRState* state, SymbolRef sym, diff --git a/lib/Checker/RangeConstraintManager.cpp b/lib/Checker/RangeConstraintManager.cpp index c904c33e08..2a35d326a9 100644 --- a/lib/Checker/RangeConstraintManager.cpp +++ b/lib/Checker/RangeConstraintManager.cpp @@ -105,97 +105,69 @@ public: return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0; } - /// AddEQ - Create a new RangeSet with the additional constraint that the - /// value be equal to V. - RangeSet AddEQ(BasicValueFactory &BV, Factory &F, const llvm::APSInt &V) { - // Search for a range that includes 'V'. If so, return a new RangeSet - // representing { [V, V] }. - for (PrimRangeSet::iterator i = begin(), e = end(); i!=e; ++i) - if (i->Includes(V)) - return RangeSet(F, V, V); - - return RangeSet(F); - } - - /// AddNE - Create a new RangeSet with the additional constraint that the - /// value be not be equal to V. - RangeSet AddNE(BasicValueFactory &BV, Factory &F, const llvm::APSInt &V) { - PrimRangeSet newRanges = ranges; - - // FIXME: We can perhaps enhance ImmutableSet to do this search for us - // in log(N) time using the sorted property of the internal AVL tree. - for (iterator i = begin(), e = end(); i != e; ++i) { - if (i->Includes(V)) { - // Remove the old range. - newRanges = F.Remove(newRanges, *i); - // Split the old range into possibly one or two ranges. - if (V != i->From()) - newRanges = F.Add(newRanges, Range(i->From(), BV.Sub1(V))); - if (V != i->To()) - newRanges = F.Add(newRanges, Range(BV.Add1(V), i->To())); - // All of the ranges are non-overlapping, so we can stop. +private: + void IntersectInRange(BasicValueFactory &BV, Factory &F, + const llvm::APSInt &Lower, + const llvm::APSInt &Upper, + PrimRangeSet &newRanges, + PrimRangeSet::iterator &i, + PrimRangeSet::iterator &e) const { + // There are six cases for each range R in the set: + // 1. R is entirely before the intersection range. + // 2. R is entirely after the intersection range. + // 3. R contains the entire intersection range. + // 4. R starts before the intersection range and ends in the middle. + // 5. R starts in the middle of the intersection range and ends after it. + // 6. R is entirely contained in the intersection range. + // These correspond to each of the conditions below. + for (/* i = begin(), e = end() */; i != e; ++i) { + if (i->To() < Lower) { + continue; + } + if (i->From() > Upper) { break; } - } - - return newRanges; - } - - /// AddNE - Create a new RangeSet with the additional constraint that the - /// value be less than V. - RangeSet AddLT(BasicValueFactory &BV, Factory &F, const llvm::APSInt &V) { - PrimRangeSet newRanges = F.GetEmptySet(); - - for (iterator i = begin(), e = end() ; i != e ; ++i) { - if (i->Includes(V) && i->From() < V) - newRanges = F.Add(newRanges, Range(i->From(), BV.Sub1(V))); - else if (i->To() < V) - newRanges = F.Add(newRanges, *i); - } - - return newRanges; - } - - RangeSet AddLE(BasicValueFactory &BV, Factory &F, const llvm::APSInt &V) { - PrimRangeSet newRanges = F.GetEmptySet(); - for (iterator i = begin(), e = end(); i != e; ++i) { - // Strictly we should test for includes *V + 1, but no harm is - // done by this formulation - if (i->Includes(V)) - newRanges = F.Add(newRanges, Range(i->From(), V)); - else if (i->To() <= V) - newRanges = F.Add(newRanges, *i); - } - - return newRanges; - } - - RangeSet AddGT(BasicValueFactory &BV, Factory &F, const llvm::APSInt &V) { - PrimRangeSet newRanges = F.GetEmptySet(); - - for (PrimRangeSet::iterator i = begin(), e = end(); i != e; ++i) { - if (i->Includes(V) && i->To() > V) - newRanges = F.Add(newRanges, Range(BV.Add1(V), i->To())); - else if (i->From() > V) - newRanges = F.Add(newRanges, *i); + if (i->Includes(Lower)) { + if (i->Includes(Upper)) { + newRanges = F.Add(newRanges, Range(BV.getValue(Lower), + BV.getValue(Upper))); + break; + } else + newRanges = F.Add(newRanges, Range(BV.getValue(Lower), i->To())); + } else { + if (i->Includes(Upper)) { + newRanges = F.Add(newRanges, Range(i->From(), BV.getValue(Upper))); + break; + } else + newRanges = F.Add(newRanges, *i); + } } - - return newRanges; } - RangeSet AddGE(BasicValueFactory &BV, Factory &F, const llvm::APSInt &V) { +public: + // Returns a set containing the values in the receiving set, intersected with + // the closed range [Lower, Upper]. Unlike the Range type, this range uses + // modular arithmetic, corresponding to the common treatment of C integer + // overflow. Thus, if the Lower bound is greater than the Upper bound, the + // range is taken to wrap around. This is equivalent to taking the + // intersection with the two ranges [Min, Upper] and [Lower, Max], + // or, alternatively, /removing/ all integers between Upper and Lower. + RangeSet Intersect(BasicValueFactory &BV, Factory &F, + const llvm::APSInt &Lower, + const llvm::APSInt &Upper) const { PrimRangeSet newRanges = F.GetEmptySet(); - for (PrimRangeSet::iterator i = begin(), e = end(); i != e; ++i) { - // Strictly we should test for includes *V - 1, but no harm is - // done by this formulation - if (i->Includes(V)) - newRanges = F.Add(newRanges, Range(V, i->To())); - else if (i->From() >= V) - newRanges = F.Add(newRanges, *i); + PrimRangeSet::iterator i = begin(), e = end(); + if (Lower <= Upper) + IntersectInRange(BV, F, Lower, Upper, newRanges, i, e); + else { + // The order of the next two statements is important! + // IntersectInRange() does not reset the iteration state for i and e. + // Therefore, the lower range most be handled first. + IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e); + IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e); } - return newRanges; } @@ -237,23 +209,29 @@ public: RangeConstraintManager(GRSubEngine &subengine) : SimpleConstraintManager(subengine) {} - const GRState* AssumeSymNE(const GRState* St, SymbolRef sym, - const llvm::APSInt& V); + const GRState* AssumeSymNE(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); - const GRState* AssumeSymEQ(const GRState* St, SymbolRef sym, - const llvm::APSInt& V); + const GRState* AssumeSymEQ(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); - const GRState* AssumeSymLT(const GRState* St, SymbolRef sym, - const llvm::APSInt& V); + const GRState* AssumeSymLT(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); - const GRState* AssumeSymGT(const GRState* St, SymbolRef sym, - const llvm::APSInt& V); + const GRState* AssumeSymGT(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); - const GRState* AssumeSymGE(const GRState* St, SymbolRef sym, - const llvm::APSInt& V); + const GRState* AssumeSymGE(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); - const GRState* AssumeSymLE(const GRState* St, SymbolRef sym, - const llvm::APSInt& V); + const GRState* AssumeSymLE(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment); const llvm::APSInt* getSymVal(const GRState* St, SymbolRef sym) const; @@ -303,10 +281,6 @@ RangeConstraintManager::RemoveDeadBindings(const GRState* state, return state->set(CR); } -//===------------------------------------------------------------------------=== -// AssumeSymX methods: public interface for RangeConstraintManager. -//===------------------------------------------------------------------------===/ - RangeSet RangeConstraintManager::GetRange(const GRState *state, SymbolRef sym) { if (ConstraintRangeTy::data_type* V = state->get(sym)) @@ -323,20 +297,127 @@ RangeConstraintManager::GetRange(const GRState *state, SymbolRef sym) { // AssumeSymX methods: public interface for RangeConstraintManager. //===------------------------------------------------------------------------===/ -#define AssumeX(OP)\ -const GRState*\ -RangeConstraintManager::AssumeSym ## OP(const GRState* state, SymbolRef sym,\ - const llvm::APSInt& V){\ - const RangeSet& R = GetRange(state, sym).Add##OP(state->getBasicVals(), F, V);\ - return !R.isEmpty() ? state->set(sym, R) : NULL;\ +// The syntax for ranges below is mathematical, using [x, y] for closed ranges +// and (x, y) for open ranges. These ranges are modular, corresponding with +// a common treatment of C integer overflow. This means that these methods +// do not have to worry about overflow; RangeSet::Intersect can handle such a +// "wraparound" range. +// As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1, +// UINT_MAX, 0, 1, and 2. + +const GRState* +RangeConstraintManager::AssumeSymNE(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + llvm::APSInt Lower = Int-Adjustment; + llvm::APSInt Upper = Lower; + --Lower; + ++Upper; + + // [Int-Adjustment+1, Int-Adjustment-1] + // Notice that the lower bound is greater than the upper bound. + RangeSet New = GetRange(state, sym).Intersect(BV, F, Upper, Lower); + return New.isEmpty() ? NULL : state->set(sym, New); } -AssumeX(EQ) -AssumeX(NE) -AssumeX(LT) -AssumeX(GT) -AssumeX(LE) -AssumeX(GE) +const GRState* +RangeConstraintManager::AssumeSymEQ(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + // [Int-Adjustment, Int-Adjustment] + BasicValueFactory &BV = state->getBasicVals(); + llvm::APSInt AdjInt = Int-Adjustment; + RangeSet New = GetRange(state, sym).Intersect(BV, F, AdjInt, AdjInt); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +const GRState* +RangeConstraintManager::AssumeSymLT(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Min = BV.getMinValue(T); + + // Special case for Int == Min. This is always false. + if (Int == Min) + return NULL; + + llvm::APSInt Lower = Min-Adjustment; + llvm::APSInt Upper = Int-Adjustment; + --Upper; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +const GRState* +RangeConstraintManager::AssumeSymGT(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Max = BV.getMaxValue(T); + + // Special case for Int == Max. This is always false. + if (Int == Max) + return NULL; + + llvm::APSInt Lower = Int-Adjustment; + llvm::APSInt Upper = Max-Adjustment; + ++Lower; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +const GRState* +RangeConstraintManager::AssumeSymGE(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Min = BV.getMinValue(T); + + // Special case for Int == Min. This is always feasible. + if (Int == Min) + return state; + + const llvm::APSInt &Max = BV.getMaxValue(T); + + llvm::APSInt Lower = Int-Adjustment; + llvm::APSInt Upper = Max-Adjustment; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} + +const GRState* +RangeConstraintManager::AssumeSymLE(const GRState* state, SymbolRef sym, + const llvm::APSInt& Int, + const llvm::APSInt& Adjustment) { + BasicValueFactory &BV = state->getBasicVals(); + + QualType T = state->getSymbolManager().getType(sym); + const llvm::APSInt &Max = BV.getMaxValue(T); + + // Special case for Int == Max. This is always feasible. + if (Int == Max) + return state; + + const llvm::APSInt &Min = BV.getMinValue(T); + + llvm::APSInt Lower = Min-Adjustment; + llvm::APSInt Upper = Int-Adjustment; + + RangeSet New = GetRange(state, sym).Intersect(BV, F, Lower, Upper); + return New.isEmpty() ? NULL : state->set(sym, New); +} //===------------------------------------------------------------------------=== // Pretty-printing. diff --git a/lib/Checker/SimpleConstraintManager.cpp b/lib/Checker/SimpleConstraintManager.cpp index 8c423a9977..3d6930b355 100644 --- a/lib/Checker/SimpleConstraintManager.cpp +++ b/lib/Checker/SimpleConstraintManager.cpp @@ -35,12 +35,11 @@ bool SimpleConstraintManager::canReasonAbout(SVal X) const { case BinaryOperator::Or: case BinaryOperator::Xor: return false; - // We don't reason yet about arithmetic constraints on symbolic values. + // We don't reason yet about these arithmetic constraints on + // symbolic values. case BinaryOperator::Mul: case BinaryOperator::Div: case BinaryOperator::Rem: - case BinaryOperator::Add: - case BinaryOperator::Sub: case BinaryOperator::Shl: case BinaryOperator::Shr: return false; @@ -90,12 +89,11 @@ const GRState *SimpleConstraintManager::AssumeAux(const GRState *state, while (SubR) { // FIXME: now we only find the first symbolic region. if (const SymbolicRegion *SymR = dyn_cast(SubR)) { + const llvm::APSInt &zero = BasicVals.getZeroWithPtrWidth(); if (Assumption) - return AssumeSymNE(state, SymR->getSymbol(), - BasicVals.getZeroWithPtrWidth()); + return AssumeSymNE(state, SymR->getSymbol(), zero, zero); else - return AssumeSymEQ(state, SymR->getSymbol(), - BasicVals.getZeroWithPtrWidth()); + return AssumeSymEQ(state, SymR->getSymbol(), zero, zero); } SubR = dyn_cast(SubR->getSuperRegion()); } @@ -121,11 +119,27 @@ const GRState *SimpleConstraintManager::Assume(const GRState *state, return SU.ProcessAssume(state, cond, assumption); } +static BinaryOperator::Opcode NegateComparison(BinaryOperator::Opcode op) { + // FIXME: This should probably be part of BinaryOperator, since this isn't + // the only place it's used. (This code was copied from SimpleSValuator.cpp.) + switch (op) { + default: + assert(false && "Invalid opcode."); + case BinaryOperator::LT: return BinaryOperator::GE; + case BinaryOperator::GT: return BinaryOperator::LE; + case BinaryOperator::LE: return BinaryOperator::GT; + case BinaryOperator::GE: return BinaryOperator::LT; + case BinaryOperator::EQ: return BinaryOperator::NE; + case BinaryOperator::NE: return BinaryOperator::EQ; + } +} + const GRState *SimpleConstraintManager::AssumeAux(const GRState *state, NonLoc Cond, bool Assumption) { - // We cannot reason about SymIntExpr and SymSymExpr. + // We cannot reason about SymSymExprs, + // and can only reason about some SymIntExprs. if (!canReasonAbout(Cond)) { // Just return the current state indicating that the path is feasible. // This may be an over-approximation of what is possible. @@ -144,30 +158,42 @@ const GRState *SimpleConstraintManager::AssumeAux(const GRState *state, SymbolRef sym = SV.getSymbol(); QualType T = SymMgr.getType(sym); const llvm::APSInt &zero = BasicVals.getValue(0, T); - - return Assumption ? AssumeSymNE(state, sym, zero) - : AssumeSymEQ(state, sym, zero); + if (Assumption) + return AssumeSymNE(state, sym, zero, zero); + else + return AssumeSymEQ(state, sym, zero, zero); } case nonloc::SymExprValKind: { nonloc::SymExprVal V = cast(Cond); - if (const SymIntExpr *SE = dyn_cast(V.getSymbolicExpression())){ - // FIXME: This is a hack. It silently converts the RHS integer to be - // of the same type as on the left side. This should be removed once - // we support truncation/extension of symbolic values. - GRStateManager &StateMgr = state->getStateManager(); - ASTContext &Ctx = StateMgr.getContext(); - QualType LHSType = SE->getLHS()->getType(Ctx); - BasicValueFactory &BasicVals = StateMgr.getBasicVals(); - const llvm::APSInt &RHS = BasicVals.Convert(LHSType, SE->getRHS()); - SymIntExpr SENew(SE->getLHS(), SE->getOpcode(), RHS, SE->getType(Ctx)); - - return AssumeSymInt(state, Assumption, &SENew); - } - // For all other symbolic expressions, over-approximate and consider - // the constraint feasible. - return state; + // For now, we only handle expressions whose RHS is an integer. + // All other expressions are assumed to be feasible. + const SymIntExpr *SE = dyn_cast(V.getSymbolicExpression()); + if (!SE) + return state; + + GRStateManager &StateMgr = state->getStateManager(); + ASTContext &Ctx = StateMgr.getContext(); + BasicValueFactory &BasicVals = StateMgr.getBasicVals(); + + // FIXME: This is a hack. It silently converts the RHS integer to be + // of the same type as on the left side. This should be removed once + // we support truncation/extension of symbolic values. + const SymExpr *LHS = SE->getLHS(); + QualType LHSType = LHS->getType(Ctx); + const llvm::APSInt &RHS = BasicVals.Convert(LHSType, SE->getRHS()); + + BinaryOperator::Opcode op = SE->getOpcode(); + // FIXME: We should implicitly compare non-comparison expressions to 0. + if (!BinaryOperator::isComparisonOp(op)) + return state; + + // From here on out, op is the real comparison we'll be testing. + if (!Assumption) + op = NegateComparison(op); + + return AssumeSymRel(state, LHS, op, RHS); } case nonloc::ConcreteIntKind: { @@ -182,43 +208,76 @@ const GRState *SimpleConstraintManager::AssumeAux(const GRState *state, } // end switch } -const GRState *SimpleConstraintManager::AssumeSymInt(const GRState *state, - bool Assumption, - const SymIntExpr *SE) { - - - // Here we assume that LHS is a symbol. This is consistent with the - // rest of the constraint manager logic. - SymbolRef Sym = cast(SE->getLHS()); - const llvm::APSInt &Int = SE->getRHS(); +const GRState *SimpleConstraintManager::AssumeSymRel(const GRState *state, + const SymExpr *LHS, + BinaryOperator::Opcode op, + const llvm::APSInt& Int) { + assert(BinaryOperator::isComparisonOp(op) && + "Non-comparison ops should be rewritten as comparisons to zero."); + + // We only handle simple comparisons of the form "$sym == constant" + // or "($sym+constant1) == constant2". + // The adjustment is "constant1" in the above expression. It's used to + // "slide" the solution range around for modular arithmetic. For example, + // x < 4 has the solution [0, 3]. x+2 < 4 has the solution [0-2, 3-2], which + // in modular arithmetic is [0, 1] U [UINT_MAX-1, UINT_MAX]. It's up to + // the subclasses of SimpleConstraintManager to handle the adjustment. + llvm::APSInt Adjustment(Int.getBitWidth(), Int.isUnsigned()); + + // First check if the LHS is a simple symbol reference. + SymbolRef Sym = dyn_cast(LHS); + if (!Sym) { + // Next, see if it's a "($sym+constant1)" expression. + const SymIntExpr *SE = dyn_cast(LHS); + + // We don't handle "($sym1+$sym2)". + // Give up and assume the constraint is feasible. + if (!SE) + return state; + + // We don't handle "(+constant1)". + // Give up and assume the constraint is feasible. + Sym = dyn_cast(SE->getLHS()); + if (!Sym) + return state; + + // Get the constant out of the expression "($sym+constant1)". + switch (SE->getOpcode()) { + case BinaryOperator::Add: + Adjustment = SE->getRHS(); + break; + case BinaryOperator::Sub: + Adjustment = -SE->getRHS(); + break; + default: + // We don't handle non-additive operators. + // Give up and assume the constraint is feasible. + return state; + } + } - switch (SE->getOpcode()) { + switch (op) { default: // No logic yet for other operators. Assume the constraint is feasible. return state; case BinaryOperator::EQ: - return Assumption ? AssumeSymEQ(state, Sym, Int) - : AssumeSymNE(state, Sym, Int); + return AssumeSymEQ(state, Sym, Int, Adjustment); case BinaryOperator::NE: - return Assumption ? AssumeSymNE(state, Sym, Int) - : AssumeSymEQ(state, Sym, Int); + return AssumeSymNE(state, Sym, Int, Adjustment); + case BinaryOperator::GT: - return Assumption ? AssumeSymGT(state, Sym, Int) - : AssumeSymLE(state, Sym, Int); + return AssumeSymGT(state, Sym, Int, Adjustment); case BinaryOperator::GE: - return Assumption ? AssumeSymGE(state, Sym, Int) - : AssumeSymLT(state, Sym, Int); + return AssumeSymGE(state, Sym, Int, Adjustment); case BinaryOperator::LT: - return Assumption ? AssumeSymLT(state, Sym, Int) - : AssumeSymGE(state, Sym, Int); + return AssumeSymLT(state, Sym, Int, Adjustment); case BinaryOperator::LE: - return Assumption ? AssumeSymLE(state, Sym, Int) - : AssumeSymGT(state, Sym, Int); + return AssumeSymLE(state, Sym, Int, Adjustment); } // end switch } diff --git a/lib/Checker/SimpleConstraintManager.h b/lib/Checker/SimpleConstraintManager.h index 5f20e0072b..45057e64f3 100644 --- a/lib/Checker/SimpleConstraintManager.h +++ b/lib/Checker/SimpleConstraintManager.h @@ -38,8 +38,10 @@ public: const GRState *Assume(const GRState *state, NonLoc Cond, bool Assumption); - const GRState *AssumeSymInt(const GRState *state, bool Assumption, - const SymIntExpr *SE); + const GRState *AssumeSymRel(const GRState *state, + const SymExpr *LHS, + BinaryOperator::Opcode op, + const llvm::APSInt& Int); const GRState *AssumeInBound(const GRState *state, DefinedSVal Idx, DefinedSVal UpperBound, @@ -51,23 +53,31 @@ protected: // Interface that subclasses must implement. //===------------------------------------------------------------------===// + // Each of these is of the form "$sym+Adj <> V", where "<>" is the comparison + // operation for the method being invoked. virtual const GRState *AssumeSymNE(const GRState *state, SymbolRef sym, - const llvm::APSInt& V) = 0; + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; virtual const GRState *AssumeSymEQ(const GRState *state, SymbolRef sym, - const llvm::APSInt& V) = 0; + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; virtual const GRState *AssumeSymLT(const GRState *state, SymbolRef sym, - const llvm::APSInt& V) = 0; + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; virtual const GRState *AssumeSymGT(const GRState *state, SymbolRef sym, - const llvm::APSInt& V) = 0; + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; virtual const GRState *AssumeSymLE(const GRState *state, SymbolRef sym, - const llvm::APSInt& V) = 0; + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; virtual const GRState *AssumeSymGE(const GRState *state, SymbolRef sym, - const llvm::APSInt& V) = 0; + const llvm::APSInt& V, + const llvm::APSInt& Adjustment) = 0; //===------------------------------------------------------------------===// // Internal implementation. diff --git a/lib/Checker/SimpleSValuator.cpp b/lib/Checker/SimpleSValuator.cpp index dd38a435a1..c85d03810a 100644 --- a/lib/Checker/SimpleSValuator.cpp +++ b/lib/Checker/SimpleSValuator.cpp @@ -261,63 +261,88 @@ SVal SimpleSValuator::EvalBinOpNN(const GRState *state, } } case nonloc::SymExprValKind: { - // Logical not? - if (!(op == BinaryOperator::EQ && rhs.isZeroConstant())) + nonloc::SymExprVal *selhs = cast(&lhs); + + // Only handle LHS of the form "$sym op constant", at least for now. + const SymIntExpr *symIntExpr = + dyn_cast(selhs->getSymbolicExpression()); + + if (!symIntExpr) return UnknownVal(); - const SymExpr *symExpr = - cast(lhs).getSymbolicExpression(); + // Is this a logical not? (!x is represented as x == 0.) + if (op == BinaryOperator::EQ && rhs.isZeroConstant()) { + // We know how to negate certain expressions. Simplify them here. - // Only handle ($sym op constant) for now. - if (const SymIntExpr *symIntExpr = dyn_cast(symExpr)) { BinaryOperator::Opcode opc = symIntExpr->getOpcode(); switch (opc) { - case BinaryOperator::LAnd: - case BinaryOperator::LOr: - assert(false && "Logical operators handled by branching logic."); - return UnknownVal(); - case BinaryOperator::Assign: - case BinaryOperator::MulAssign: - case BinaryOperator::DivAssign: - case BinaryOperator::RemAssign: - case BinaryOperator::AddAssign: - case BinaryOperator::SubAssign: - case BinaryOperator::ShlAssign: - case BinaryOperator::ShrAssign: - case BinaryOperator::AndAssign: - case BinaryOperator::XorAssign: - case BinaryOperator::OrAssign: - case BinaryOperator::Comma: - assert(false && "'=' and ',' operators handled by GRExprEngine."); - return UnknownVal(); - case BinaryOperator::PtrMemD: - case BinaryOperator::PtrMemI: - assert(false && "Pointer arithmetic not handled here."); - return UnknownVal(); - case BinaryOperator::Mul: - case BinaryOperator::Div: - case BinaryOperator::Rem: - case BinaryOperator::Add: - case BinaryOperator::Sub: - case BinaryOperator::Shl: - case BinaryOperator::Shr: - case BinaryOperator::And: - case BinaryOperator::Xor: - case BinaryOperator::Or: - // Not handled yet. - return UnknownVal(); - case BinaryOperator::LT: - case BinaryOperator::GT: - case BinaryOperator::LE: - case BinaryOperator::GE: - case BinaryOperator::EQ: - case BinaryOperator::NE: - opc = NegateComparison(opc); - assert(symIntExpr->getType(ValMgr.getContext()) == resultTy); - return ValMgr.makeNonLoc(symIntExpr->getLHS(), opc, - symIntExpr->getRHS(), resultTy); + default: + // We don't know how to negate this operation. + // Just handle it as if it were a normal comparison to 0. + break; + case BinaryOperator::LAnd: + case BinaryOperator::LOr: + assert(false && "Logical operators handled by branching logic."); + return UnknownVal(); + case BinaryOperator::Assign: + case BinaryOperator::MulAssign: + case BinaryOperator::DivAssign: + case BinaryOperator::RemAssign: + case BinaryOperator::AddAssign: + case BinaryOperator::SubAssign: + case BinaryOperator::ShlAssign: + case BinaryOperator::ShrAssign: + case BinaryOperator::AndAssign: + case BinaryOperator::XorAssign: + case BinaryOperator::OrAssign: + case BinaryOperator::Comma: + assert(false && "'=' and ',' operators handled by GRExprEngine."); + return UnknownVal(); + case BinaryOperator::PtrMemD: + case BinaryOperator::PtrMemI: + assert(false && "Pointer arithmetic not handled here."); + return UnknownVal(); + case BinaryOperator::LT: + case BinaryOperator::GT: + case BinaryOperator::LE: + case BinaryOperator::GE: + case BinaryOperator::EQ: + case BinaryOperator::NE: + // Negate the comparison and make a value. + opc = NegateComparison(opc); + assert(symIntExpr->getType(ValMgr.getContext()) == resultTy); + return ValMgr.makeNonLoc(symIntExpr->getLHS(), opc, + symIntExpr->getRHS(), resultTy); } } + + // For now, only handle expressions whose RHS is a constant. + const nonloc::ConcreteInt *rhsInt = dyn_cast(&rhs); + if (!rhsInt) + return UnknownVal(); + + // If both the LHS and the current expression are additive, + // fold their constants. + if (BinaryOperator::isAdditiveOp(op)) { + BinaryOperator::Opcode lop = symIntExpr->getOpcode(); + if (BinaryOperator::isAdditiveOp(lop)) { + BasicValueFactory &BVF = ValMgr.getBasicValueFactory(); + const llvm::APSInt *newRHS; + if (lop == op) + newRHS = BVF.EvaluateAPSInt(BinaryOperator::Add, + symIntExpr->getRHS(), + rhsInt->getValue()); + else + newRHS = BVF.EvaluateAPSInt(BinaryOperator::Sub, + symIntExpr->getRHS(), + rhsInt->getValue()); + return ValMgr.makeNonLoc(symIntExpr->getLHS(), lop, *newRHS, + resultTy); + } + } + + // Otherwise, make a SymExprVal out of the expression. + return ValMgr.makeNonLoc(symIntExpr, op, rhsInt->getValue(), resultTy); } case nonloc::ConcreteIntKind: { if (isa(rhs)) { diff --git a/test/Analysis/additive-folding-range-constraints.c b/test/Analysis/additive-folding-range-constraints.c new file mode 100644 index 0000000000..1202328e91 --- /dev/null +++ b/test/Analysis/additive-folding-range-constraints.c @@ -0,0 +1,99 @@ +// RUN: %clang_cc1 -analyze -analyzer-check-objc-mem -analyzer-experimental-checks -verify -analyzer-constraints=range %s +#include + +// These are used to trigger warnings. +typedef typeof(sizeof(int)) size_t; +void *malloc(size_t); +void free(void *); +#define NULL ((void*)0) + +// Each of these adjusted ranges has an adjustment small enough to split the +// solution range across an overflow boundary (Min for <, Max for >). +// This corresponds to one set of branches in RangeConstraintManager. +void smallAdjustmentGT (unsigned a) { + char* b = NULL; + if (a+2 > 1) + b = malloc(1); + if (a == UINT_MAX-1 || a == UINT_MAX) + return; // no-warning + else if (a < UINT_MAX-1) + free(b); + return; // no-warning +} + +void smallAdjustmentGE (unsigned a) { + char* b = NULL; + if (a+2 >= 1) + b = malloc(1); + if (a == UINT_MAX-1) + return; // no-warning + else if (a < UINT_MAX-1 || a == UINT_MAX) + free(b); + return; // no-warning +} + +void smallAdjustmentLT (unsigned a) { + char* b = NULL; + if (a+1 < 2) + b = malloc(1); + if (a == 0 || a == UINT_MAX) + free(b); + return; // no-warning +} + +void smallAdjustmentLE (unsigned a) { + char* b = NULL; + if (a+1 <= 2) + b = malloc(1); + if (a == 0 || a == 1 || a == UINT_MAX) + free(b); + return; // no-warning +} + + +// Each of these adjusted ranges has an adjustment large enough to push the +// comparison value over an overflow boundary (Min for <, Max for >). +// This corresponds to one set of branches in RangeConstraintManager. +void largeAdjustmentGT (unsigned a) { + char* b = NULL; + if (a-2 > UINT_MAX-1) + b = malloc(1); + if (a == 1 || a == 0) + free(b); + else if (a > 1) + free(b); + return; // no-warning +} + +void largeAdjustmentGE (unsigned a) { + char* b = NULL; + if (a-2 >= UINT_MAX-1) + b = malloc(1); + if (a > 1) + return; // no-warning + else if (a == 1 || a == 0) + free(b); + return; // no-warning +} + +void largeAdjustmentLT (unsigned a) { + char* b = NULL; + if (a+2 < 1) + b = malloc(1); + if (a == UINT_MAX-1 || a == UINT_MAX) + free(b); + else if (a < UINT_MAX-1) + return; // no-warning + return; // no-warning +} + +void largeAdjustmentLE (unsigned a) { + char* b = NULL; + if (a+2 <= 1) + b = malloc(1); + if (a < UINT_MAX-1) + return; // no-warning + else if (a == UINT_MAX-1 || a == UINT_MAX) + free(b); + return; // no-warning +} diff --git a/test/Analysis/additive-folding.c b/test/Analysis/additive-folding.c new file mode 100644 index 0000000000..14fd42a2bf --- /dev/null +++ b/test/Analysis/additive-folding.c @@ -0,0 +1,163 @@ +// RUN: %clang_cc1 -analyze -analyzer-check-objc-mem -analyzer-experimental-checks -verify -analyzer-constraints=basic %s +// RUN: %clang_cc1 -analyze -analyzer-check-objc-mem -analyzer-experimental-checks -verify -analyzer-constraints=range %s +#include + +// These are used to trigger warnings. +typedef typeof(sizeof(int)) size_t; +void *malloc(size_t); +void free(void *); +#define NULL ((void*)0) + +//--------------- +// Plus/minus +//--------------- + +void separateExpressions (int a) { + int b = a + 1; + --b; + + char* buf = malloc(1); + if (a != 0 && b == 0) + return; // no-warning + free(buf); +} + +void oneLongExpression (int a) { + // Expression canonicalization should still allow this to work, even though + // the first term is on the left. + int b = 15 + a + 15 - 10 - 20; + + char* buf = malloc(1); + if (a != 0 && b == 0) + return; // no-warning + free(buf); +} + +//--------------- +// Comparisons +//--------------- + +// Equality and inequality only +void eq_ne (unsigned a) { + char* b = NULL; + if (a == UINT_MAX) + b = malloc(1); + if (a+1 != 0) + return; // no-warning + if (a-1 != UINT_MAX-1) + return; // no-warning + free(b); +} + +void ne_eq (unsigned a) { + char* b = NULL; + if (a != UINT_MAX) + b = malloc(1); + if (a+1 == 0) + return; // no-warning + if (a-1 == UINT_MAX-1) + return; // no-warning + free(b); +} + + +// Simple order comparisons with no adjustment +void baselineGT (unsigned a) { + char* b = NULL; + if (a > 0) + b = malloc(1); + if (a == 0) + return; // no-warning + free(b); +} + +void baselineGE (unsigned a) { + char* b = NULL; + if (a >= UINT_MAX) + b = malloc(1); + if (a == UINT_MAX) + free(b); + return; // no-warning +} + +void baselineLT (unsigned a) { + char* b = NULL; + if (a < UINT_MAX) + b = malloc(1); + if (a == UINT_MAX) + return; // no-warning + free(b); +} + +void baselineLE (unsigned a) { + char* b = NULL; + if (a <= 0) + b = malloc(1); + if (a == 0) + free(b); + return; // no-warning +} + + +// Adjustment gives each of these an extra solution! +void adjustedGT (unsigned a) { + char* b = NULL; + if (a-1 > UINT_MAX-1) + b = malloc(1); + return; // expected-warning{{leak}} +} + +void adjustedGE (unsigned a) { + char* b = NULL; + if (a-1 >= UINT_MAX-1) + b = malloc(1); + if (a == UINT_MAX) + free(b); + return; // expected-warning{{leak}} +} + +void adjustedLT (unsigned a) { + char* b = NULL; + if (a+1 < 1) + b = malloc(1); + return; // expected-warning{{leak}} +} + +void adjustedLE (unsigned a) { + char* b = NULL; + if (a+1 <= 1) + b = malloc(1); + if (a == 0) + free(b); + return; // expected-warning{{leak}} +} + + +// Tautologies +void tautologyGT (unsigned a) { + char* b = malloc(1); + if (a > UINT_MAX) + return; // no-warning + free(b); +} + +void tautologyGE (unsigned a) { + char* b = malloc(1); + if (a >= 0) + free(b); + return; // no-warning +} + +void tautologyLT (unsigned a) { + char* b = malloc(1); + if (a < 0) + return; // no-warning + free(b); +} + +void tautologyLE (unsigned a) { + char* b = malloc(1); + if (a <= UINT_MAX) + free(b); + return; // no-warning +} -- 2.40.0