From b48f7c059e74cd5395ca542c1a96be16e42f3d80 Mon Sep 17 00:00:00 2001 From: Kaelyn Uhrain Date: Tue, 26 Jul 2011 01:52:28 +0000 Subject: [PATCH] Expand array bounds checking to work in the presence of unary & and *, and to work with pointer arithmetic in addition to array indexing. The new pointer arithmetic porition of the array bounds checking can be turned on by -Warray-bounds-pointer-arithmetic (and is off by default). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@136046 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticSemaKinds.td | 8 +- include/clang/Sema/Sema.h | 2 + lib/Sema/SemaChecking.cpp | 89 +++++++++++++++++----- lib/Sema/SemaExpr.cpp | 17 +++++ test/SemaCXX/array-bounds-ptr-arith.cpp | 33 ++++++++ test/SemaCXX/array-bounds.cpp | 13 ++-- 6 files changed, 134 insertions(+), 28 deletions(-) create mode 100644 test/SemaCXX/array-bounds-ptr-arith.cpp diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index cdfa11203e..8a3fabdef7 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -4035,11 +4035,17 @@ def err_out_of_line_default_deletes : Error< def err_defaulted_move_unsupported : Error< "defaulting move functions not yet supported">; +def warn_ptr_arith_precedes_bounds : Warning< + "the pointer decremented by %0 refers before the beginning of the array">, + InGroup>, DefaultIgnore; +def warn_ptr_arith_exceeds_bounds : Warning< + "the pointer incremented by %0 refers past the end of the array (that contains %1 element%s2)">, + InGroup>, DefaultIgnore; def warn_array_index_precedes_bounds : Warning< "array index of '%0' indexes before the beginning of the array">, InGroup>; def warn_array_index_exceeds_bounds : Warning< - "array index of '%0' indexes past the end of an array (that contains %1 elements)">, + "array index of '%0' indexes past the end of an array (that contains %1 element%s2)">, InGroup>; def note_array_index_out_of_bounds : Note< "array %0 declared here">; diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index 41d0e8a176..241a570b7f 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -5885,6 +5885,8 @@ public: unsigned ByteNo) const; private: + void CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, + bool isSubscript=false); void CheckArrayAccess(const Expr *E); bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); bool CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall); diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 25256b0407..32d90bffc2 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -3467,43 +3467,87 @@ void Sema::CheckCastAlign(Expr *Op, QualType T, SourceRange TRange) { << TRange << Op->getSourceRange(); } -static void CheckArrayAccess_Check(Sema &S, - const clang::ArraySubscriptExpr *E) { - const Expr *BaseExpr = E->getBase()->IgnoreParenImpCasts(); +static const Type* getElementType(const Expr *BaseExpr) { + const Type* EltType = BaseExpr->getType().getTypePtr(); + if (EltType->isAnyPointerType()) + return EltType->getPointeeType().getTypePtr(); + else if (EltType->isArrayType()) + return EltType->getBaseElementTypeUnsafe(); + return EltType; +} + +void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr, + bool isSubscript) { + const Type* EffectiveType = getElementType(BaseExpr); + BaseExpr = BaseExpr->IgnoreParenCasts(); + IndexExpr = IndexExpr->IgnoreParenCasts(); + const ConstantArrayType *ArrayTy = - S.Context.getAsConstantArrayType(BaseExpr->getType()); + Context.getAsConstantArrayType(BaseExpr->getType()); if (!ArrayTy) return; - const Expr *IndexExpr = E->getIdx(); if (IndexExpr->isValueDependent()) return; llvm::APSInt index; - if (!IndexExpr->isIntegerConstantExpr(index, S.Context)) + if (!IndexExpr->isIntegerConstantExpr(index, Context)) return; if (index.isUnsigned() || !index.isNegative()) { llvm::APInt size = ArrayTy->getSize(); if (!size.isStrictlyPositive()) return; + + const Type* BaseType = getElementType(BaseExpr); + if (BaseType != EffectiveType) { + // Make sure we're comparing apples to apples when comparing index to size + uint64_t ptrarith_typesize = Context.getTypeSize(EffectiveType); + uint64_t array_typesize = Context.getTypeSize(BaseType); + if (ptrarith_typesize != array_typesize) { + // There's a cast to a different size type involved + uint64_t ratio = array_typesize / ptrarith_typesize; + if (ptrarith_typesize * ratio != array_typesize) + // If the size of the array's base type is not a multiple of the + // casted-to pointee type, the results of the pointer arithmetic + // may or may not point to something consistently meaningful or within a + // valid reference... + return; + + size *= llvm::APInt(size.getBitWidth(), ratio); + } + } + if (size.getBitWidth() > index.getBitWidth()) index = index.sext(size.getBitWidth()); else if (size.getBitWidth() < index.getBitWidth()) size = size.sext(index.getBitWidth()); - if (index.slt(size)) + // For array subscripting the index must be less than size, but for pointer + // arithmetic also allow the index (offset) to be equal to size since + // computing the next address after the end of the array is legal and + // commonly done e.g. in C++ iterators and range-based for loops. + if (isSubscript ? index.slt(size) : index.sle(size)) return; - S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, - S.PDiag(diag::warn_array_index_exceeds_bounds) - << index.toString(10, true) - << size.toString(10, true) - << IndexExpr->getSourceRange()); + unsigned DiagID = diag::warn_ptr_arith_exceeds_bounds; + if (isSubscript) + DiagID = diag::warn_array_index_exceeds_bounds; + + DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr, + PDiag(DiagID) << index.toString(10, true) + << size.toString(10, true) + << (unsigned)size.getLimitedValue(~0U) + << IndexExpr->getSourceRange()); } else { - S.DiagRuntimeBehavior(E->getBase()->getLocStart(), BaseExpr, - S.PDiag(diag::warn_array_index_precedes_bounds) - << index.toString(10, true) - << IndexExpr->getSourceRange()); + unsigned DiagID = diag::warn_array_index_precedes_bounds; + if (!isSubscript) { + DiagID = diag::warn_ptr_arith_precedes_bounds; + if (index.isNegative()) index = -index; + } + + DiagRuntimeBehavior(BaseExpr->getLocStart(), BaseExpr, + PDiag(DiagID) << index.toString(10, true) + << IndexExpr->getSourceRange()); } const NamedDecl *ND = NULL; @@ -3512,18 +3556,21 @@ static void CheckArrayAccess_Check(Sema &S, if (const MemberExpr *ME = dyn_cast(BaseExpr)) ND = dyn_cast(ME->getMemberDecl()); if (ND) - S.DiagRuntimeBehavior(ND->getLocStart(), BaseExpr, - S.PDiag(diag::note_array_index_out_of_bounds) - << ND->getDeclName()); + DiagRuntimeBehavior(ND->getLocStart(), BaseExpr, + PDiag(diag::note_array_index_out_of_bounds) + << ND->getDeclName()); } void Sema::CheckArrayAccess(const Expr *expr) { while (true) { expr = expr->IgnoreParens(); switch (expr->getStmtClass()) { - case Stmt::ArraySubscriptExprClass: - CheckArrayAccess_Check(*this, cast(expr)); + case Stmt::ArraySubscriptExprClass: { + const ArraySubscriptExpr *ASE = cast(expr); + CheckArrayAccess(ASE->getBase(), + ASE->getIdx(), true); return; + } case Stmt::ConditionalOperatorClass: { const ConditionalOperator *cond = cast(expr); if (const Expr *lhs = cond->getLHS()) diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index 4a9b4bcfdf..bb7e3d8669 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -268,6 +268,7 @@ ExprResult Sema::DefaultFunctionArrayConversion(Expr *E) { E = ImpCastExprToType(E, Context.getPointerType(Ty), CK_FunctionToPointerDecay).take(); else if (Ty->isArrayType()) { + CheckArrayAccess(E); // In C90 mode, arrays only promote to pointers if the array expression is // an lvalue. The relevant legalese is C90 6.2.2.1p3: "an lvalue that has // type 'array of type' is converted to an expression that has type 'pointer @@ -310,6 +311,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { // A glvalue of a non-function, non-array type T can be // converted to a prvalue. if (!E->isGLValue()) return Owned(E); + QualType T = E->getType(); assert(!T.isNull() && "r-value conversion on typeless expression?"); @@ -385,6 +387,14 @@ ExprResult Sema::UsualUnaryConversions(Expr *E) { QualType Ty = E->getType(); assert(!Ty.isNull() && "UsualUnaryConversions - missing type"); + if (Ty->isPointerType() || Ty->isArrayType()) { + Expr *subE = E->IgnoreParenImpCasts(); + while (UnaryOperator *UO = dyn_cast(subE)) { + subE = UO->getSubExpr()->IgnoreParenImpCasts(); + } + if (subE) CheckArrayAccess(subE); + } + // Try to perform integral promotions if the object has a theoretically // promotable type. if (Ty->isIntegralOrUnscopedEnumerationType()) { @@ -5812,6 +5822,8 @@ QualType Sema::CheckAdditionOperands( // C99 6.5.6 return QualType(); } + CheckArrayAccess(PExp, IExp); + if (CompLHSTy) { QualType LHSTy = Context.isPromotableBitField(lex.get()); if (LHSTy.isNull()) { @@ -5866,6 +5878,11 @@ QualType Sema::CheckSubtractionOperands(ExprResult &lex, ExprResult &rex, if (!checkArithmeticOpPointerOperand(*this, Loc, lex.get())) return QualType(); + Expr *IExpr = rex.get()->IgnoreParenCasts(); + UnaryOperator negRex(IExpr, UO_Minus, IExpr->getType(), VK_RValue, + OK_Ordinary, IExpr->getExprLoc()); + CheckArrayAccess(lex.get()->IgnoreParenCasts(), &negRex); + if (CompLHSTy) *CompLHSTy = lex.get()->getType(); return lex.get()->getType(); } diff --git a/test/SemaCXX/array-bounds-ptr-arith.cpp b/test/SemaCXX/array-bounds-ptr-arith.cpp new file mode 100644 index 0000000000..ce1ace6f2f --- /dev/null +++ b/test/SemaCXX/array-bounds-ptr-arith.cpp @@ -0,0 +1,33 @@ +// RUN: %clang_cc1 -verify -Warray-bounds-pointer-arithmetic %s + +void swallow (const char *x) { (void)x; } +void test_pointer_arithmetic(int n) { + const char hello[] = "Hello world!"; // expected-note 2 {{declared here}} + const char *helloptr = hello; + + swallow("Hello world!" + 6); // no-warning + swallow("Hello world!" - 6); // expected-warning {{refers before the beginning of the array}} + swallow("Hello world!" + 14); // expected-warning {{refers past the end of the array}} + swallow("Hello world!" + 13); // no-warning + + swallow(hello + 6); // no-warning + swallow(hello - 6); // expected-warning {{refers before the beginning of the array}} + swallow(hello + 14); // expected-warning {{refers past the end of the array}} + swallow(hello + 13); // no-warning + + swallow(helloptr + 6); // no-warning + swallow(helloptr - 6); // no-warning + swallow(helloptr + 14); // no-warning + swallow(helloptr + 13); // no-warning + + double numbers[2]; // expected-note {{declared here}} + swallow((char*)numbers + sizeof(double)); // no-warning + swallow((char*)numbers + 60); // expected-warning {{refers past the end of the array}} + + char buffer[5]; // expected-note 2 {{declared here}} + // TODO: Add FixIt notes for adding parens around non-ptr part of arith expr + swallow(buffer + sizeof("Hello")-1); // expected-warning {{refers past the end of the array}} + swallow(buffer + (sizeof("Hello")-1)); // no-warning + if (n > 0 && n <= 6) swallow(buffer + 6 - n); // expected-warning {{refers past the end of the array}} + if (n > 0 && n <= 6) swallow(buffer + (6 - n)); // no-warning +} diff --git a/test/SemaCXX/array-bounds.cpp b/test/SemaCXX/array-bounds.cpp index 3bd6c35420..f7bb7dd54c 100644 --- a/test/SemaCXX/array-bounds.cpp +++ b/test/SemaCXX/array-bounds.cpp @@ -25,7 +25,7 @@ void f1(int a[1]) { } void f2(const int (&a)[1]) { // expected-note {{declared here}} - int val = a[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 elements)}} + int val = a[3]; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 element)}} } void test() { @@ -35,15 +35,17 @@ void test() { s2.a[3] = 0; // no warning for 0-sized array union { - short a[2]; // expected-note {{declared here}} + short a[2]; // expected-note 2 {{declared here}} char c[4]; } u; u.a[3] = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}} u.c[3] = 1; // no warning + *(&u.a[3]) = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 2 elements)}} + *(&u.c[3]) = 1; // no warning const int const_subscript = 3; int array[1]; // expected-note {{declared here}} - array[const_subscript] = 0; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 elements)}} + array[const_subscript] = 0; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 element)}} int *ptr; ptr[3] = 0; // no warning for pointer references @@ -59,7 +61,7 @@ void test() { char c2 = str2[5]; // expected-warning {{array index of '5' indexes past the end of an array (that contains 4 elements)}} int (*array_ptr)[1]; - (*array_ptr)[3] = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 elements)}} + (*array_ptr)[3] = 1; // expected-warning {{array index of '3' indexes past the end of an array (that contains 1 element)}} } template struct S { @@ -151,8 +153,7 @@ void test_switch() { enum enumA { enumA_A, enumA_B, enumA_C, enumA_D, enumA_E }; enum enumB { enumB_X, enumB_Y, enumB_Z }; static enum enumB myVal = enumB_X; -void test_nested_switch() -{ +void test_nested_switch() { switch (enumA_E) { // expected-warning {{no case matching constant}} switch (myVal) { // expected-warning {{enumeration values 'enumB_X' and 'enumB_Z' not handled in switch}} case enumB_Y: ; -- 2.40.0