From: Kaelyn Uhrain Date: Fri, 5 Aug 2011 23:18:04 +0000 (+0000) Subject: Perform array bounds checking in more situations and properly handle special X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d6c8865e6f306556b7eef972ceec2cc8ba026a0d;p=clang Perform array bounds checking in more situations and properly handle special case situations with the unary operators & and *. Also extend the array bounds checking to work with pointer arithmetic; the pointer arithemtic checking can be turned on using -Warray-bounds-pointer-arithmetic. The changes to where CheckArrayAccess gets called is based on some trial & error and a bunch of digging through source code and gdb backtraces in order to have the check performed under as many situations as possible (such as for variable initializers, arguments to function calls, and within conditional in addition to the simpler cases of the operands to binary and unary operator) while not being called--and triggering warnings--more than once for a given ArraySubscriptExpr. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@136997 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 1bb6c7125f..c277dee7a8 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -4085,13 +4085,19 @@ def err_out_of_line_default_deletes : Error< def err_defaulted_move_unsupported : Error< "defaulting move functions not yet supported">; +def warn_ptr_arith_precedes_bounds : Warning< + "the pointer decremented by %0 refers before the beginning of the array">, + InGroup>, DefaultIgnore; +def warn_ptr_arith_exceeds_bounds : Warning< + "the pointer incremented by %0 refers past the end of the array (that " + "contains %1 element%s2)">, + InGroup>, DefaultIgnore; def warn_array_index_precedes_bounds : Warning< "array index of '%0' indexes before the beginning of the array">, InGroup>; def warn_array_index_exceeds_bounds : Warning< "array index of '%0' indexes past the end of an array (that contains %1 " - "%select{element|elements}2)">, - InGroup>; + "element%s2)">, InGroup>; def note_array_index_out_of_bounds : Note< "array %0 declared here">; diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index 5363b50368..a073cb9f07 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -5948,6 +5948,8 @@ public: unsigned ByteNo) const; private: + void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, + bool isSubscript=false, bool AllowOnePastEnd=true); void CheckArrayAccess(const Expr *E); bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); bool CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall); diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index c54d250b04..8dfcfe7c11 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -3370,6 +3370,11 @@ void Sema::CheckImplicitConversions(Expr *E, SourceLocation CC) { if (E->isTypeDependent() || E->isValueDependent()) return; + // Check for array bounds violations in cases where the check isn't triggered + // elsewhere for other Expr types (like BinaryOperators), e.g. when an + // ArraySubscriptExpr is on the RHS of a variable initialization. + CheckArrayAccess(E); + // This is not the right CC for (e.g.) a variable initialization. AnalyzeImplicitConversions(*this, E, CC); } @@ -3474,6 +3479,15 @@ void Sema::CheckCastAlign(Expr *Op, QualType T, SourceRange TRange) { << TRange << Op->getSourceRange(); } +static const Type* getElementType(const Expr *BaseExpr) { + const Type* EltType = BaseExpr->getType().getTypePtr(); + if (EltType->isAnyPointerType()) + return EltType->getPointeeType().getTypePtr(); + else if (EltType->isArrayType()) + return EltType->getBaseElementTypeUnsafe(); + return EltType; +} + /// \brief Check whether this array fits the idiom of a size-one tail padded /// array member of a struct. /// @@ -3510,19 +3524,21 @@ static bool IsTailPaddedMemberArray(Sema &S, llvm::APInt Size, return false; } -static void CheckArrayAccess_Check(Sema &S, - const clang::ArraySubscriptExpr *E) { - const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts(); +void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, + bool isSubscript, bool AllowOnePastEnd) { + const Type* EffectiveType = getElementType(BaseExpr); + BaseExpr = BaseExpr->IgnoreParenCasts(); + IndexExpr = IndexExpr->IgnoreParenCasts(); + const ConstantArrayType *ArrayTy = - S.Context.getAsConstantArrayType(BaseExpr->getType()); + Context.getAsConstantArrayType(BaseExpr->getType()); if (!ArrayTy) return; - const Expr *IndexExpr = E->getIdx(); if (IndexExpr->isValueDependent()) return; llvm::APSInt index; - if (!IndexExpr->isIntegerConstantExpr(index, S.Context)) + if (!IndexExpr->isIntegerConstantExpr(index, Context)) return; const NamedDecl *ND = NULL; @@ -3535,47 +3551,94 @@ static void CheckArrayAccess_Check(Sema &S, llvm::APInt size = ArrayTy->getSize(); if (!size.isStrictlyPositive()) return; + + const Type* BaseType = getElementType(BaseExpr); + if (!isSubscript && BaseType != EffectiveType) { + // Make sure we're comparing apples to apples when comparing index to size + uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType); + uint64_t array_typesize = Context.getTypeSize(BaseType); + if (ptrarith_typesize != array_typesize) { + // There's a cast to a different size type involved + uint64_t ratio = array_typesize / ptrarith_typesize; + // TODO: Be smarter about handling cases where array_typesize is not a + // multiple of ptrarith_typesize + if (ptrarith_typesize * ratio == array_typesize) + size *= llvm::APInt(size.getBitWidth(), ratio); + } + } + if (size.getBitWidth() > index.getBitWidth()) index = index.sext(size.getBitWidth()); else if (size.getBitWidth() < index.getBitWidth()) size = size.sext(index.getBitWidth()); - // Don't warn for valid indexes - if (index.slt(size)) + // For array subscripting the index must be less than size, but for pointer + // arithmetic also allow the index (offset) to be equal to size since + // computing the next address after the end of the array is legal and + // commonly done e.g. in C++ iterators and range-based for loops. + if (AllowOnePastEnd ? index.sle(size) : index.slt(size)) return; // Also don't warn for arrays of size 1 which are members of some // structure. These are often used to approximate flexible arrays in C89 // code. - if (IsTailPaddedMemberArray(S, size, ND)) + if (IsTailPaddedMemberArray(*this, size, ND)) return; - S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, - S.PDiag(diag::warn_array_index_exceeds_bounds) - << index.toString(10, true) - << size.toString(10, true) - << (unsigned)size.ugt(1) - << IndexExpr->getSourceRange()); + unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds; + if (isSubscript) + DiagID = diag::warn_array_index_exceeds_bounds; + + DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr, + PDiag(DiagID) << index.toString(10, true) + << size.toString(10, true) + << (unsigned)size.getLimitedValue(~0U) + << IndexExpr->getSourceRange()); } else { - S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, - S.PDiag(diag::warn_array_index_precedes_bounds) - << index.toString(10, true) - << IndexExpr->getSourceRange()); + unsigned DiagID = diag::warn_array_index_precedes_bounds; + if (!isSubscript) { + DiagID = diag::warn_ptr_arith_precedes_bounds; + if (index.isNegative()) index = -index; + } + + DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr, + PDiag(DiagID) << index.toString(10, true) + << IndexExpr->getSourceRange()); } if (ND) - S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr, - S.PDiag(diag::note_array_index_out_of_bounds) - << ND->getDeclName()); + DiagRuntimeBehavior(ND->getLocStart(), BaseExpr, + PDiag(diag::note_array_index_out_of_bounds) + << ND->getDeclName()); } void Sema::CheckArrayAccess(const Expr *expr) { - while (true) { - expr = expr->IgnoreParens(); + int AllowOnePastEnd = 0; + while (expr) { + expr = expr->IgnoreParenImpCasts(); switch (expr->getStmtClass()) { - case Stmt::ArraySubscriptExprClass: - CheckArrayAccess_Check(*this, cast(expr)); + case Stmt::ArraySubscriptExprClass: { + const ArraySubscriptExpr *ASE = cast(expr); + CheckArrayAccess(ASE->getBase(), ASE->getIdx(), true, + AllowOnePastEnd > 0); return; + } + case Stmt::UnaryOperatorClass: { + // Only unwrap the * and & unary operators + const UnaryOperator *UO = cast(expr); + expr = UO->getSubExpr(); + switch (UO->getOpcode()) { + case UO_AddrOf: + AllowOnePastEnd++; + break; + case UO_Deref: + AllowOnePastEnd--; + break; + default: + return; + } + break; + } case Stmt::ConditionalOperatorClass: { const ConditionalOperator *cond = cast(expr); if (const Expr *lhs = cond->getLHS()) diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index bc7d0bdfe5..5e092b7c06 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -322,6 +322,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { // A glvalue of a non-function, non-array type T can be // converted to a prvalue. if (!E->isGLValue()) return Owned(E); + QualType T = E->getType(); assert(!T.isNull() && "r-value conversion on typeless expression?"); @@ -364,8 +365,6 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { // type of the lvalue. if (T.hasQualifiers()) T = T.getUnqualifiedType(); - - CheckArrayAccess(E); return Owned(ImplicitCastExpr::Create(Context, T, CK_LValueToRValue, E, 0, VK_RValue)); @@ -3397,6 +3396,12 @@ bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, Arg = ArgExpr.takeAs(); } + + // Check for array bounds violations for each argument to the call. This + // check only triggers warnings when the argument isn't a more complex Expr + // with its own checking, such as a BinaryOperator. + CheckArrayAccess(Arg); + AllArgs.push_back(Arg); } @@ -5935,6 +5940,9 @@ QualType Sema::CheckAdditionOperands( // C99 6.5.6 return QualType(); } + // Check array bounds for pointer arithemtic + CheckArrayAccess(PExp, IExp); + if (CompLHSTy) { QualType LHSTy = Context.isPromotableBitField(lex.get()); if (LHSTy.isNull()) { @@ -5991,6 +5999,12 @@ QualType Sema::CheckSubtractionOperands(ExprResult &lex, ExprResult &rex, if (!checkArithmeticOpPointerOperand(*this, Loc, lex.get())) return QualType(); + Expr *IExpr = rex.get()->IgnoreParenCasts(); + UnaryOperator negRex(IExpr, UO_Minus, IExpr->getType(), VK_RValue, + OK_Ordinary, IExpr->getExprLoc()); + // Check array bounds for pointer arithemtic + CheckArrayAccess(lex.get()->IgnoreParenCasts(), &negRex); + if (CompLHSTy) *CompLHSTy = lex.get()->getType(); return lex.get()->getType(); } @@ -6984,9 +6998,7 @@ QualType Sema::CheckAssignmentOperands(Expr *LHS, ExprResult &RHS, return QualType(); CheckForNullPointerDereference(*this, LHS); - // Check for trivial buffer overflows. - CheckArrayAccess(LHS->IgnoreParenCasts()); - + // C99 6.5.16p3: The type of an assignment expression is the type of the // left operand unless the left operand has qualified type, in which case // it is the unqualified version of the type of the left operand. @@ -7721,6 +7733,11 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, } if (ResultTy.isNull() || lhs.isInvalid() || rhs.isInvalid()) return ExprError(); + + // Check for array bounds violations for both sides of the BinaryOperator + CheckArrayAccess(lhs.get()); + CheckArrayAccess(rhs.get()); + if (CompResultTy.isNull()) return Owned(new (Context) BinaryOperator(lhs.take(), rhs.take(), Opc, ResultTy, VK, OK, OpLoc)); @@ -8071,6 +8088,13 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, if (resultType.isNull() || Input.isInvalid()) return ExprError(); + // Check for array bounds violations in the operand of the UnaryOperator, + // except for the '*' and '&' operators that have to be handled specially + // by CheckArrayAccess (as there are special cases like &array[arraysize] + // that are explicitly defined as valid by the standard). + if (Opc != UO_AddrOf && Opc != UO_Deref) + CheckArrayAccess(Input.get()); + return Owned(new (Context) UnaryOperator(Input.take(), Opc, resultType, VK, OK, OpLoc)); } diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp index ef4944ec35..4f4946384d 100644 --- a/lib/Sema/SemaExprCXX.cpp +++ b/lib/Sema/SemaExprCXX.cpp @@ -2264,9 +2264,6 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, if (!From->isGLValue()) break; } - // Check for trivial buffer overflows. - CheckArrayAccess(From); - FromType = FromType.getUnqualifiedType(); From = ImplicitCastExpr::Create(Context, FromType, CK_LValueToRValue, From, 0, VK_RValue); diff --git a/test/SemaCXX/array-bounds.cpp b/test/SemaCXX/array-bounds.cpp index e8156320db..2bbb4776ad 100644 --- a/test/SemaCXX/array-bounds.cpp +++ b/test/SemaCXX/array-bounds.cpp @@ -37,11 +37,16 @@ void test() { s2.a[3] = 0; // no warning for 0-sized array union { - short a[2]; // expected-note {{declared here}} + short a[2]; // expected-note 4 {{declared here}} char c[4]; } u; u.a[3] = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}} u.c[3] = 1; // no warning + short *p = &u.a[2]; // no warning + p = &u.a[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}} + *(&u.a[2]) = 1; // expected-warning {{array index of '2' indexes past the end of an array (that contains 2 elements)}} + *(&u.a[3]) = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}} + *(&u.c[3]) = 1; // no warning const int const_subscript = 3; int array[2]; // expected-note {{declared here}} @@ -153,8 +158,7 @@ void test_switch() { enum enumA { enumA_A, enumA_B, enumA_C, enumA_D, enumA_E }; enum enumB { enumB_X, enumB_Y, enumB_Z }; static enum enumB myVal = enumB_X; -void test_nested_switch() -{ +void test_nested_switch() { switch (enumA_E) { // expected-warning {{no case matching constant}} switch (myVal) { // expected-warning {{enumeration values 'enumB_X' and 'enumB_Z' not handled in switch}} case enumB_Y: ; @@ -199,3 +203,14 @@ namespace metaprogramming { B->c[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 element)}} } } + +void bar(int x) {} +int test_more() { + int foo[5]; // expected-note 5 {{array 'foo' declared here}} + bar(foo[5]); // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}} + ++foo[5]; // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}} + if (foo[6]) // expected-warning {{array index of '6' indexes past the end of an array (that contains 5 elements)}} + return --foo[6]; // expected-warning {{array index of '6' indexes past the end of an array (that contains 5 elements)}} + else + return foo[5]; // expected-warning {{array index of '5' indexes past the end of an array (that contains 5 elements)}} +}