From f883b21a96192ffd3c1284fa74a8b7d7c4c8dd8c Mon Sep 17 00:00:00 2001 From: Andy Gibbs Date: Fri, 26 Feb 2016 15:35:16 +0000 Subject: [PATCH] Reduce false positives in printf/scanf format checker Summary: The printf/scanf format checker is a little over-zealous in handling the conditional operator. This patch reduces work by not checking code-paths that are never used and reduces false positives regarding uncovered arguments, for example in the code fragment: printf(minimal ? "%i\n" : "%i: %s\n", code, msg); Reviewers: rtrieu Subscribers: cfe-commits Differential Revision: http://reviews.llvm.org/D15636 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@262025 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Sema/SemaChecking.cpp | 179 +++++++++++++++++++++++++------ test/Sema/format-strings-scanf.c | 14 ++- test/Sema/format-strings.c | 19 +++- 3 files changed, 176 insertions(+), 36 deletions(-) diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 134248dbe2..c38dda6464 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -3256,6 +3256,51 @@ bool Sema::SemaBuiltinSetjmp(CallExpr *TheCall) { } namespace { +class UncoveredArgHandler { + enum { Unknown = -1, AllCovered = -2 }; + signed FirstUncoveredArg; + SmallVector DiagnosticExprs; + +public: + UncoveredArgHandler() : FirstUncoveredArg(Unknown) { } + + bool hasUncoveredArg() const { + return (FirstUncoveredArg >= 0); + } + + unsigned getUncoveredArg() const { + assert(hasUncoveredArg() && "no uncovered argument"); + return FirstUncoveredArg; + } + + void setAllCovered() { + // A string has been found with all arguments covered, so clear out + // the diagnostics. + DiagnosticExprs.clear(); + FirstUncoveredArg = AllCovered; + } + + void Update(signed NewFirstUncoveredArg, const Expr *StrExpr) { + assert(NewFirstUncoveredArg >= 0 && "Outside range"); + + // Don't update if a previous string covers all arguments. + if (FirstUncoveredArg == AllCovered) + return; + + // UncoveredArgHandler tracks the highest uncovered argument index + // and with it all the strings that match this index. + if (NewFirstUncoveredArg == FirstUncoveredArg) + DiagnosticExprs.push_back(StrExpr); + else if (NewFirstUncoveredArg > FirstUncoveredArg) { + DiagnosticExprs.clear(); + DiagnosticExprs.push_back(StrExpr); + FirstUncoveredArg = NewFirstUncoveredArg; + } + } + + void Diagnose(Sema &S, bool IsFunctionCall, const Expr *ArgExpr); +}; + enum StringLiteralCheckType { SLCT_NotALiteral, SLCT_UncheckedLiteral, @@ -3271,7 +3316,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr, Sema::FormatStringType Type, bool inFunctionCall, Sema::VariadicCallType CallType, - llvm::SmallBitVector &CheckedVarArgs); + llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg); // Determine if an expression is a string literal or constant string. // If this function returns false on the arguments to a function expecting a @@ -3282,7 +3328,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef Args, bool HasVAListArg, unsigned format_idx, unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, - llvm::SmallBitVector &CheckedVarArgs) { + llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg) { tryAgain: if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3303,17 +3350,39 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef Args, // completely checked only if both sub-expressions were checked. const AbstractConditionalOperator *C = cast(E); - StringLiteralCheckType Left = - checkFormatStringExpr(S, C->getTrueExpr(), Args, - HasVAListArg, format_idx, firstDataArg, - Type, CallType, InFunctionCall, CheckedVarArgs); - if (Left == SLCT_NotALiteral) - return SLCT_NotALiteral; + + // Determine whether it is necessary to check both sub-expressions, for + // example, because the condition expression is a constant that can be + // evaluated at compile time. + bool CheckLeft = true, CheckRight = true; + + bool Cond; + if (C->getCond()->EvaluateAsBooleanCondition(Cond, S.getASTContext())) { + if (Cond) + CheckRight = false; + else + CheckLeft = false; + } + + StringLiteralCheckType Left; + if (!CheckLeft) + Left = SLCT_UncheckedLiteral; + else { + Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, + HasVAListArg, format_idx, firstDataArg, + Type, CallType, InFunctionCall, + CheckedVarArgs, UncoveredArg); + if (Left == SLCT_NotALiteral || !CheckRight) + return Left; + } + StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, - Type, CallType, InFunctionCall, CheckedVarArgs); - return Left < Right ? Left : Right; + Type, CallType, InFunctionCall, CheckedVarArgs, + UncoveredArg); + + return (CheckLeft && Left < Right) ? Left : Right; } case Stmt::ImplicitCastExprClass: { @@ -3364,7 +3433,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef Args, return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/false, CheckedVarArgs); + /*InFunctionCall*/false, CheckedVarArgs, + UncoveredArg); } } @@ -3419,7 +3489,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef Args, return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs); + CheckedVarArgs, UncoveredArg); } else if (const FunctionDecl *FD = dyn_cast(ND)) { unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -3428,7 +3498,8 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef Args, return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - InFunctionCall, CheckedVarArgs); + InFunctionCall, CheckedVarArgs, + UncoveredArg); } } } @@ -3447,7 +3518,7 @@ checkFormatStringExpr(Sema &S, const Expr *E, ArrayRef Args, if (StrE) { CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, firstDataArg, Type, InFunctionCall, CallType, - CheckedVarArgs); + CheckedVarArgs, UncoveredArg); return SLCT_CheckedLiteral; } @@ -3515,10 +3586,20 @@ bool Sema::CheckFormatArguments(ArrayRef Args, // C string (e.g. "%d") // ObjC string uses the same format specifiers as C string, so we can use // the same format string checking logic for both ObjC and C strings. + UncoveredArgHandler UncoveredArg; StringLiteralCheckType CT = checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*IsFunctionCall*/true, CheckedVarArgs); + /*IsFunctionCall*/true, CheckedVarArgs, + UncoveredArg); + + // Generate a diagnostic where an uncovered argument is detected. + if (UncoveredArg.hasUncoveredArg()) { + unsigned ArgIdx = UncoveredArg.getUncoveredArg() + firstDataArg; + assert(ArgIdx < Args.size() && "ArgIdx outside bounds"); + UncoveredArg.Diagnose(*this, /*IsFunctionCall*/true, Args[ArgIdx]); + } + if (CT != SLCT_NotALiteral) // Literal format string found, check done! return CT == SLCT_CheckedLiteral; @@ -3567,6 +3648,7 @@ protected: bool inFunctionCall; Sema::VariadicCallType CallType; llvm::SmallBitVector &CheckedVarArgs; + UncoveredArgHandler &UncoveredArg; public: CheckFormatHandler(Sema &s, const StringLiteral *fexpr, @@ -3575,14 +3657,15 @@ public: ArrayRef Args, unsigned formatIdx, bool inFunctionCall, Sema::VariadicCallType callType, - llvm::SmallBitVector &CheckedVarArgs) + llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg) : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), FirstDataArg(firstDataArg), NumDataArgs(numDataArgs), Beg(beg), HasVAListArg(hasVAListArg), Args(Args), FormatIdx(formatIdx), usesPositionalArgs(false), atFirstArg(true), inFunctionCall(inFunctionCall), CallType(callType), - CheckedVarArgs(CheckedVarArgs) { + CheckedVarArgs(CheckedVarArgs), UncoveredArg(UncoveredArg) { CoveredArgs.resize(numDataArgs); CoveredArgs.reset(); } @@ -3813,26 +3896,44 @@ const Expr *CheckFormatHandler::getDataArg(unsigned i) const { } void CheckFormatHandler::DoneProcessing() { - // Does the number of data arguments exceed the number of - // format conversions in the format string? + // Does the number of data arguments exceed the number of + // format conversions in the format string? if (!HasVAListArg) { // Find any arguments that weren't covered. CoveredArgs.flip(); signed notCoveredArg = CoveredArgs.find_first(); if (notCoveredArg >= 0) { assert((unsigned)notCoveredArg < NumDataArgs); - if (const Expr *E = getDataArg((unsigned) notCoveredArg)) { - SourceLocation Loc = E->getLocStart(); - if (!S.getSourceManager().isInSystemMacro(Loc)) { - EmitFormatDiagnostic(S.PDiag(diag::warn_printf_data_arg_not_used), - Loc, /*IsStringLocation*/false, - getFormatStringRange()); - } - } + UncoveredArg.Update(notCoveredArg, OrigFormatExpr); + } else { + UncoveredArg.setAllCovered(); } } } +void UncoveredArgHandler::Diagnose(Sema &S, bool IsFunctionCall, + const Expr *ArgExpr) { + assert(hasUncoveredArg() && DiagnosticExprs.size() > 0 && + "Invalid state"); + + if (!ArgExpr) + return; + + SourceLocation Loc = ArgExpr->getLocStart(); + + if (S.getSourceManager().isInSystemMacro(Loc)) + return; + + PartialDiagnostic PDiag = S.PDiag(diag::warn_printf_data_arg_not_used); + for (auto E : DiagnosticExprs) + PDiag << E->getSourceRange(); + + CheckFormatHandler::EmitFormatDiagnostic( + S, IsFunctionCall, DiagnosticExprs[0], + PDiag, Loc, /*IsStringLocation*/false, + DiagnosticExprs[0]->getSourceRange()); +} + bool CheckFormatHandler::HandleInvalidConversionSpecifier(unsigned argIndex, SourceLocation Loc, @@ -3886,6 +3987,10 @@ CheckFormatHandler::CheckNumArgs( EmitFormatDiagnostic( PDiag, getLocationOfByte(CS.getStart()), /*IsStringLocation*/true, getSpecifierRange(startSpecifier, specifierLen)); + + // Since more arguments than conversion tokens are given, by extension + // all arguments are covered, so mark this as so. + UncoveredArg.setAllCovered(); return false; } return true; @@ -3967,10 +4072,12 @@ public: ArrayRef Args, unsigned formatIdx, bool inFunctionCall, Sema::VariadicCallType CallType, - llvm::SmallBitVector &CheckedVarArgs) + llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg) : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, numDataArgs, beg, hasVAListArg, Args, - formatIdx, inFunctionCall, CallType, CheckedVarArgs), + formatIdx, inFunctionCall, CallType, CheckedVarArgs, + UncoveredArg), ObjCContext(isObjC) {} @@ -4751,11 +4858,12 @@ public: ArrayRef Args, unsigned formatIdx, bool inFunctionCall, Sema::VariadicCallType CallType, - llvm::SmallBitVector &CheckedVarArgs) + llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg) : CheckFormatHandler(s, fexpr, origFormatExpr, firstDataArg, numDataArgs, beg, hasVAListArg, Args, formatIdx, inFunctionCall, CallType, - CheckedVarArgs) + CheckedVarArgs, UncoveredArg) {} bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, @@ -4923,7 +5031,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr, Sema::FormatStringType Type, bool inFunctionCall, Sema::VariadicCallType CallType, - llvm::SmallBitVector &CheckedVarArgs) { + llvm::SmallBitVector &CheckedVarArgs, + UncoveredArgHandler &UncoveredArg) { // CHECK: is the format string a wide literal? if (!FExpr->isAscii() && !FExpr->isUTF8()) { CheckFormatHandler::EmitFormatDiagnostic( @@ -4971,7 +5080,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr, numDataArgs, (Type == Sema::FST_NSString || Type == Sema::FST_OSTrace), Str, HasVAListArg, Args, format_idx, - inFunctionCall, CallType, CheckedVarArgs); + inFunctionCall, CallType, CheckedVarArgs, + UncoveredArg); if (!analyze_format_string::ParsePrintfString(H, Str, Str + StrLen, S.getLangOpts(), @@ -4981,7 +5091,8 @@ static void CheckFormatString(Sema &S, const StringLiteral *FExpr, } else if (Type == Sema::FST_Scanf) { CheckScanfHandler H(S, FExpr, OrigFormatExpr, firstDataArg, numDataArgs, Str, HasVAListArg, Args, format_idx, - inFunctionCall, CallType, CheckedVarArgs); + inFunctionCall, CallType, CheckedVarArgs, + UncoveredArg); if (!analyze_format_string::ParseScanfString(H, Str, Str + StrLen, S.getLangOpts(), diff --git a/test/Sema/format-strings-scanf.c b/test/Sema/format-strings-scanf.c index d3a03adf61..7a92842b24 100644 --- a/test/Sema/format-strings-scanf.c +++ b/test/Sema/format-strings-scanf.c @@ -18,7 +18,7 @@ int vfscanf(FILE * restrict, const char * restrict, va_list); int vsscanf(const char * restrict, const char * restrict, va_list); void test(const char *s, int *i) { - scanf(s, i); // expected-warning{{ormat string is not a string literal}} + scanf(s, i); // expected-warning{{format string is not a string literal}} scanf("%0d", i); // expected-warning{{zero field width in scanf format string is unused}} scanf("%00d", i); // expected-warning{{zero field width in scanf format string is unused}} scanf("%d%[asdfasdfd", i, s); // expected-warning{{no closing ']' for '%[' in scanf format string}} @@ -171,3 +171,15 @@ void test_qualifiers(const int *cip, volatile int* vip, scanf("%d", (ip_t)0); // No warning. scanf("%d", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}} } + +void check_conditional_literal(char *s, int *i) { + scanf(0 ? "%s" : "%d", i); // no warning + scanf(1 ? "%s" : "%d", i); // expected-warning{{format specifies type 'char *'}} + scanf(0 ? "%d %d" : "%d", i); // no warning + scanf(1 ? "%d %d" : "%d", i); // expected-warning{{more '%' conversions than data arguments}} + scanf(0 ? "%d %d" : "%d", i, s); // expected-warning{{data argument not used}} + scanf(1 ? "%d %s" : "%d", i, s); // no warning + scanf(i ? "%d %s" : "%d", i, s); // no warning + scanf(i ? "%d" : "%d", i, s); // expected-warning{{data argument not used}} + scanf(i ? "%s" : "%d", s); // expected-warning{{format specifies type 'int *'}} +} diff --git a/test/Sema/format-strings.c b/test/Sema/format-strings.c index 0d827e400d..69723f6e1a 100644 --- a/test/Sema/format-strings.c +++ b/test/Sema/format-strings.c @@ -46,6 +46,9 @@ void check_string_literal( FILE* fp, const char* s, char *buf, ... ) { vscanf(s, ap); // expected-warning {{format string is not a string literal}} + const char *const fmt = "%d"; // FIXME -- defined here + printf(fmt, 1, 2); // expected-warning{{data argument not used}} + // rdar://6079877 printf("abc" "%*d", 1, 1); // no-warning @@ -86,6 +89,20 @@ void check_conditional_literal(const char* s, int i) { printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}} printf("yes" ?: "no %d", 1); // expected-warning{{data argument not used by format string}} + printf(0 ? "yes %s" : "no %d", 1); // no-warning + printf(0 ? "yes %d" : "no %s", 1); // expected-warning{{format specifies type 'char *'}} + + printf(0 ? "yes" : "no %d", 1); // no-warning + printf(0 ? "yes %d" : "no", 1); // expected-warning{{data argument not used by format string}} + printf(1 ? "yes" : "no %d", 1); // expected-warning{{data argument not used by format string}} + printf(1 ? "yes %d" : "no", 1); // no-warning + printf(i ? "yes" : "no %d", 1); // no-warning + printf(i ? "yes %s" : "no %d", 1); // expected-warning{{format specifies type 'char *'}} + printf(i ? "yes" : "no %d", 1, 2); // expected-warning{{data argument not used by format string}} + + printf(i ? "%*s" : "-", i, s); // no-warning + printf(i ? "yes" : 0 ? "no %*d" : "dont know %d", 1, 2); // expected-warning{{data argument not used by format string}} + printf(i ? "%i\n" : "%i %s %s\n", i, s); // expected-warning{{more '%' conversions than data arguments}} } void check_writeback_specifier() @@ -519,7 +536,7 @@ void pr9751() { // Make sure that the "format string is defined here" note is not emitted // when the original string is within the argument expression. - printf(1 ? "yes %d" : "no %d"); // expected-warning 2{{more '%' conversions than data arguments}} + printf(1 ? "yes %d" : "no %d"); // expected-warning{{more '%' conversions than data arguments}} const char kFormat17[] = "%hu"; // expected-note{{format string is defined here}}} printf(kFormat17, (int[]){0}); // expected-warning{{format specifies type 'unsigned short' but the argument}} -- 2.40.0