From: Ted Kremenek Date: Fri, 26 Feb 2010 19:18:41 +0000 (+0000) Subject: For printf format string checking, move the tracking of the data argument index out of X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7f70dc85d5055c19c8003f43a59135de211ad1b9;p=clang For printf format string checking, move the tracking of the data argument index out of Sema and into analyze_printf::ParseFormatString(). Also use a bitvector to determine what arguments have been covered (instead of just checking to see if the last argument consumed is the max argument). This is prep. for support positional arguments (an IEEE extension). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@97248 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Analysis/Analyses/PrintfFormatString.h b/include/clang/Analysis/Analyses/PrintfFormatString.h index f1e8220908..f66892f185 100644 --- a/include/clang/Analysis/Analyses/PrintfFormatString.h +++ b/include/clang/Analysis/Analyses/PrintfFormatString.h @@ -152,18 +152,21 @@ class OptionalAmount { public: enum HowSpecified { NotSpecified, Constant, Arg }; - OptionalAmount(HowSpecified h, const char *st) - : start(st), hs(h), amt(0) {} + OptionalAmount(HowSpecified h, unsigned i, const char *st) + : start(st), hs(h), amt(i) {} OptionalAmount() : start(0), hs(NotSpecified), amt(0) {} - OptionalAmount(unsigned i, const char *st) - : start(st), hs(Constant), amt(i) {} - HowSpecified getHowSpecified() const { return hs; } + bool hasDataArgument() const { return hs == Arg; } + unsigned getArgIndex() const { + assert(hasDataArgument()); + return amt; + } + unsigned getConstantAmount() const { assert(hs == Constant); return amt; @@ -188,14 +191,14 @@ class FormatSpecifier { unsigned HasSpacePrefix : 1; unsigned HasAlternativeForm : 1; unsigned HasLeadingZeroes : 1; - unsigned flags : 5; + unsigned argIndex; ConversionSpecifier CS; OptionalAmount FieldWidth; OptionalAmount Precision; public: FormatSpecifier() : LM(None), IsLeftJustified(0), HasPlusPrefix(0), HasSpacePrefix(0), - HasAlternativeForm(0), HasLeadingZeroes(0) {} + HasAlternativeForm(0), HasLeadingZeroes(0), argIndex(0) {} static FormatSpecifier Parse(const char *beg, const char *end); @@ -212,6 +215,16 @@ public: void setHasAlternativeForm() { HasAlternativeForm = 1; } void setHasLeadingZeros() { HasLeadingZeroes = 1; } + void setArgIndex(unsigned i) { + assert(CS.consumesDataArgument()); + argIndex = i; + } + + unsigned getArgIndex() const { + assert(CS.consumesDataArgument()); + return argIndex; + } + // Methods for querying the format specifier. const ConversionSpecifier &getConversionSpecifier() const { @@ -262,10 +275,10 @@ public: virtual void HandleNullChar(const char *nullCharacter) {} - virtual void + virtual bool HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, - unsigned specifierLen) {} + unsigned specifierLen) { return true; } virtual bool HandleFormatSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td index 52fbbcbb00..3d9253db33 100644 --- a/include/clang/Basic/DiagnosticSemaKinds.td +++ b/include/clang/Basic/DiagnosticSemaKinds.td @@ -2521,8 +2521,8 @@ def warn_printf_write_back : Warning< InGroup; def warn_printf_insufficient_data_args : Warning< "more '%%' conversions than data arguments">, InGroup; -def warn_printf_too_many_data_args : Warning< - "more data arguments than format specifiers">, InGroup; +def warn_printf_data_arg_not_used : Warning< + "data argument not used by format string">, InGroup; def warn_printf_invalid_conversion : Warning< "invalid conversion specifier '%0'">, InGroup; def warn_printf_incomplete_specifier : Warning< diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp index 3aee57cb81..9c0f813964 100644 --- a/lib/Analysis/PrintfFormatString.cpp +++ b/lib/Analysis/PrintfFormatString.cpp @@ -62,7 +62,8 @@ public: // Methods for parsing format strings. //===----------------------------------------------------------------------===// -static OptionalAmount ParseAmount(const char *&Beg, const char *E) { +static OptionalAmount ParseAmount(const char *&Beg, const char *E, + unsigned &argIndex) { const char *I = Beg; UpdateOnReturn UpdateBeg(Beg, I); @@ -78,11 +79,11 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) { } if (foundDigits) - return OptionalAmount(accumulator, Beg); + return OptionalAmount(OptionalAmount::Constant, accumulator, Beg); if (c == '*') { ++I; - return OptionalAmount(OptionalAmount::Arg, Beg); + return OptionalAmount(OptionalAmount::Arg, argIndex++, Beg); } break; @@ -93,7 +94,8 @@ static OptionalAmount ParseAmount(const char *&Beg, const char *E) { static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, const char *&Beg, - const char *E) { + const char *E, + unsigned &argIndex) { using namespace clang::analyze_printf; @@ -149,7 +151,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, } // Look for the field width (if any). - FS.setFieldWidth(ParseAmount(I, E)); + FS.setFieldWidth(ParseAmount(I, E, argIndex)); if (I == E) { // No more characters left? @@ -165,7 +167,7 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, return true; } - FS.setPrecision(ParseAmount(I, E)); + FS.setPrecision(ParseAmount(I, E, argIndex)); if (I == E) { // No more characters left? @@ -241,20 +243,26 @@ static FormatSpecifierResult ParseFormatSpecifier(FormatStringHandler &H, // Glibc specific. case 'm': k = ConversionSpecifier::PrintErrno; break; } - FS.setConversionSpecifier(ConversionSpecifier(conversionPosition, k)); + ConversionSpecifier CS(conversionPosition, k); + FS.setConversionSpecifier(CS); + if (CS.consumesDataArgument()) + FS.setArgIndex(argIndex++); if (k == ConversionSpecifier::InvalidSpecifier) { - H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); - return false; // Keep processing format specifiers. + // Assume the conversion takes one argument. + return !H.HandleInvalidConversionSpecifier(FS, Beg, I - Beg); } return FormatSpecifierResult(Start, FS); } bool clang::analyze_printf::ParseFormatString(FormatStringHandler &H, const char *I, const char *E) { + + unsigned argIndex = 0; + // Keep looking for a format specifier until we have exhausted the string. while (I != E) { - const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E); + const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E, argIndex); // Did a fail-stop error of any kind occur when parsing the specifier? // If so, don't do any more processing. if (FSR.shouldStop()) @@ -430,7 +438,7 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { return Ctx.LongDoubleTy; return Ctx.DoubleTy; } - + switch (CS.getKind()) { case ConversionSpecifier::CStrArg: return ArgTypeResult(LM == AsWideChar ? ArgTypeResult::WCStrTy : ArgTypeResult::CStrTy); @@ -438,11 +446,11 @@ ArgTypeResult FormatSpecifier::getArgType(ASTContext &Ctx) const { // FIXME: This appears to be Mac OS X specific. return ArgTypeResult::WCStrTy; case ConversionSpecifier::CArg: - return Ctx.WCharTy; + return Ctx.WCharTy; default: break; } - + // FIXME: Handle other cases. return ArgTypeResult(); } diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 019b79811d..2f71c778ec 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -955,6 +955,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull, /// FormatGuard: Automatic Protection From printf Format String /// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. /// +/// TODO: /// Functionality implemented: /// /// We can statically check the following properties for string @@ -965,7 +966,7 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull, /// data arguments? /// /// (2) Does each format conversion correctly match the type of the -/// corresponding data argument? (TODO) +/// corresponding data argument? /// /// Moreover, for all printf functions we can: /// @@ -984,7 +985,6 @@ Sema::CheckNonNullArguments(const NonNullAttr *NonNull, /// /// All of these checks can be done by parsing the format string. /// -/// For now, we ONLY do (1), (3), (5), (6), (7), and (8). void Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg, unsigned format_idx, unsigned firstDataArg) { @@ -1044,13 +1044,13 @@ class CheckPrintfHandler : public analyze_printf::FormatStringHandler { Sema &S; const StringLiteral *FExpr; const Expr *OrigFormatExpr; - unsigned NumConversions; const unsigned NumDataArgs; const bool IsObjCLiteral; const char *Beg; // Start of format string. const bool HasVAListArg; const CallExpr *TheCall; unsigned FormatIdx; + llvm::BitVector CoveredArgs; public: CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, const Expr *origFormatExpr, @@ -1058,17 +1058,20 @@ public: const char *beg, bool hasVAListArg, const CallExpr *theCall, unsigned formatIdx) : S(s), FExpr(fexpr), OrigFormatExpr(origFormatExpr), - NumConversions(0), NumDataArgs(numDataArgs), + NumDataArgs(numDataArgs), IsObjCLiteral(isObjCLiteral), Beg(beg), HasVAListArg(hasVAListArg), - TheCall(theCall), FormatIdx(formatIdx) {} + TheCall(theCall), FormatIdx(formatIdx) { + CoveredArgs.resize(numDataArgs); + CoveredArgs.reset(); + } void DoneProcessing(); void HandleIncompleteFormatSpecifier(const char *startSpecifier, unsigned specifierLen); - void + bool HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, unsigned specifierLen); @@ -1117,18 +1120,35 @@ HandleIncompleteFormatSpecifier(const char *startSpecifier, << getFormatSpecifierRange(startSpecifier, specifierLen); } -void CheckPrintfHandler:: +bool CheckPrintfHandler:: HandleInvalidConversionSpecifier(const analyze_printf::FormatSpecifier &FS, const char *startSpecifier, unsigned specifierLen) { - ++NumConversions; + unsigned argIndex = FS.getArgIndex(); + bool keepGoing = true; + if (argIndex < NumDataArgs) { + // Consider the argument coverered, even though the specifier doesn't + // make sense. + CoveredArgs.set(argIndex); + } + else { + // If argIndex exceeds the number of data arguments we + // don't issue a warning because that is just a cascade of warnings (and + // they may have intended '%%' anyway). We don't want to continue processing + // the format string after this point, however, as we will like just get + // gibberish when trying to match arguments. + keepGoing = false; + } + const analyze_printf::ConversionSpecifier &CS = FS.getConversionSpecifier(); SourceLocation Loc = getLocationOfByte(CS.getStart()); S.Diag(Loc, diag::warn_printf_invalid_conversion) << llvm::StringRef(CS.getStart(), CS.getLength()) << getFormatSpecifierRange(startSpecifier, specifierLen); + + return keepGoing; } void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) { @@ -1139,7 +1159,7 @@ void CheckPrintfHandler::HandleNullChar(const char *nullCharacter) { } const Expr *CheckPrintfHandler::getDataArg(unsigned i) const { - return TheCall->getArg(FormatIdx + i); + return TheCall->getArg(FormatIdx + i + 1); } @@ -1162,9 +1182,9 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, unsigned specifierLen) { if (Amt.hasDataArgument()) { - ++NumConversions; if (!HasVAListArg) { - if (NumConversions > NumDataArgs) { + unsigned argIndex = Amt.getArgIndex(); + if (argIndex >= NumDataArgs) { S.Diag(getLocationOfByte(Amt.getStart()), MissingArgDiag) << getFormatSpecifierRange(startSpecifier, specifierLen); // Don't do any more checking. We will just emit @@ -1176,7 +1196,8 @@ CheckPrintfHandler::HandleAmount(const analyze_printf::OptionalAmount &Amt, // Although not in conformance with C99, we also allow the argument to be // an 'unsigned int' as that is a reasonably safe case. GCC also // doesn't emit a warning for that case. - const Expr *Arg = getDataArg(NumConversions); + CoveredArgs.set(argIndex); + const Expr *Arg = getDataArg(argIndex); QualType T = Arg->getType(); const analyze_printf::ArgTypeResult &ATR = Amt.getArgType(S.Context); @@ -1221,22 +1242,21 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier return false; } - // Check for using an Objective-C specific conversion specifier - // in a non-ObjC literal. - if (!IsObjCLiteral && CS.isObjCArg()) { - HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen); - - // Continue checking the other format specifiers. - return true; - } - if (!CS.consumesDataArgument()) { // FIXME: Technically specifying a precision or field width here // makes no sense. Worth issuing a warning at some point. return true; } - ++NumConversions; + // Consume the argument. + unsigned argIndex = FS.getArgIndex(); + CoveredArgs.set(argIndex); + + // Check for using an Objective-C specific conversion specifier + // in a non-ObjC literal. + if (!IsObjCLiteral && CS.isObjCArg()) { + return HandleInvalidConversionSpecifier(FS, startSpecifier, specifierLen); + } // Are we using '%n'? Issue a warning about this being // a possible security issue. @@ -1270,7 +1290,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier if (HasVAListArg) return true; - if (NumConversions > NumDataArgs) { + if (argIndex >= NumDataArgs) { S.Diag(getLocationOfByte(CS.getStart()), diag::warn_printf_insufficient_data_args) << getFormatSpecifierRange(startSpecifier, specifierLen); @@ -1280,7 +1300,7 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier // Now type check the data expression that matches the // format specifier. - const Expr *Ex = getDataArg(NumConversions); + const Expr *Ex = getDataArg(argIndex); const analyze_printf::ArgTypeResult &ATR = FS.getArgType(S.Context); if (ATR.isValid() && !ATR.matchesType(S.Context, Ex->getType())) { // Check if we didn't match because of an implicit cast from a 'char' @@ -1304,10 +1324,17 @@ CheckPrintfHandler::HandleFormatSpecifier(const analyze_printf::FormatSpecifier void CheckPrintfHandler::DoneProcessing() { // Does the number of data arguments exceed the number of // format conversions in the format string? - if (!HasVAListArg && NumConversions < NumDataArgs) - S.Diag(getDataArg(NumConversions+1)->getLocStart(), - diag::warn_printf_too_many_data_args) - << getFormatStringRange(); + if (!HasVAListArg) { + // Find any arguments that weren't covered. + CoveredArgs.flip(); + signed notCoveredArg = CoveredArgs.find_first(); + if (notCoveredArg >= 0) { + assert((unsigned)notCoveredArg < NumDataArgs); + S.Diag(getDataArg((unsigned) notCoveredArg)->getLocStart(), + diag::warn_printf_data_arg_not_used) + << getFormatStringRange(); + } + } } void Sema::CheckPrintfString(const StringLiteral *FExpr, diff --git a/test/Sema/format-strings.c b/test/Sema/format-strings.c index e2686a885e..191996004d 100644 --- a/test/Sema/format-strings.c +++ b/test/Sema/format-strings.c @@ -55,7 +55,7 @@ void check_conditional_literal(const char* s, int i) { printf(i == 1 ? "yes" : "no"); // no-warning printf(i == 0 ? (i == 1 ? "yes" : "no") : "dont know"); // no-warning printf(i == 0 ? (i == 1 ? s : "no") : "dont know"); // expected-warning{{format string is not a string literal}} - printf("yes" ?: "no %d", 1); // expected-warning{{more data arguments than format specifiers}} + printf("yes" ?: "no %d", 1); // expected-warning{{data argument not used by format string}} } void check_writeback_specifier() @@ -155,7 +155,7 @@ void test10(int x, float f, int i, long long lli) { printf("%**\n"); // expected-warning{{invalid conversion specifier '*'}} printf("%n", &i); // expected-warning{{use of '%n' in format string discouraged (potentially insecure)}} printf("%d%d\n", x); // expected-warning{{more '%' conversions than data arguments}} - printf("%d\n", x, x); // expected-warning{{more data arguments than format specifiers}} + printf("%d\n", x, x); // expected-warning{{data argument not used by format string}} printf("%W%d%Z\n", x, x, x); // expected-warning{{invalid conversion specifier 'W'}} expected-warning{{invalid conversion specifier 'Z'}} printf("%"); // expected-warning{{incomplete format specifier}} printf("%.d", x); // no-warning