From: Chris Lattner Date: Fri, 10 Aug 2007 20:18:51 +0000 (+0000) Subject: initial support for checking format strings, patch by Ted Kremenek: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=59907c4d8f6fc8aacfdaa0273bd7a9c140fbb45f;p=clang initial support for checking format strings, patch by Ted Kremenek: "I've coded up some support in clang to flag warnings for non-constant format strings used in calls to printf-like functions (all the functions listed in "man fprintf"). Non-constant format strings are a source of many security exploits in C/C++ programs, and I believe are currently detected by gcc using the flag -Wformat-nonliteral." git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@41003 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/Sema/Sema.cpp b/Sema/Sema.cpp index 64c18a80c0..76771d0d5c 100644 --- a/Sema/Sema.cpp +++ b/Sema/Sema.cpp @@ -16,10 +16,26 @@ #include "clang/AST/ASTContext.h" #include "clang/Lex/Preprocessor.h" #include "clang/Basic/Diagnostic.h" + using namespace clang; Sema::Sema(Preprocessor &pp, ASTContext &ctxt, std::vector &prevInGroup) : PP(pp), Context(ctxt), CurFunctionDecl(0), LastInGroupList(prevInGroup) { + + // Get IdentifierInfo objects for known functions for which we + // do extra checking. + IdentifierTable& IT = PP.getIdentifierTable(); + + KnownFunctionIDs[ id_printf ] = &IT.get("printf"); + KnownFunctionIDs[ id_fprintf ] = &IT.get("fprintf"); + KnownFunctionIDs[ id_sprintf ] = &IT.get("sprintf"); + KnownFunctionIDs[ id_snprintf ] = &IT.get("snprintf"); + KnownFunctionIDs[ id_vsnprintf ] = &IT.get("vsnprintf"); + KnownFunctionIDs[ id_asprintf ] = &IT.get("asprintf"); + KnownFunctionIDs[ id_vasprintf ] = &IT.get("vasprintf"); + KnownFunctionIDs[ id_vfprintf ] = &IT.get("vfprintf"); + KnownFunctionIDs[ id_vsprintf ] = &IT.get("vsprintf"); + KnownFunctionIDs[ id_vprintf ] = &IT.get("vprintf"); } //===----------------------------------------------------------------------===// diff --git a/Sema/Sema.h b/Sema/Sema.h index b6d47971f0..3cf75328c8 100644 --- a/Sema/Sema.h +++ b/Sema/Sema.h @@ -68,6 +68,28 @@ class Sema : public Action { /// us to associate a raw vector type with one of the OCU type names. /// This is only necessary for issuing pretty diagnostics. llvm::SmallVector OCUVectorDecls; + + // Enum values used by KnownFunctionIDs (see below). + enum { + id_printf, + id_fprintf, + id_sprintf, + id_snprintf, + id_vsnprintf, + id_asprintf, + id_vasprintf, + id_vfprintf, + id_vsprintf, + id_vprintf, + id_num_known_functions + }; + + /// KnownFunctionIDs - This is a list of IdentifierInfo objects to a set + /// of known functions used by the semantic analysis to do various + /// kinds of checking (e.g. checking format string errors in printf calls). + /// This list is populated upon the creation of a Sema object. + IdentifierInfo* KnownFunctionIDs[ id_num_known_functions ]; + public: Sema(Preprocessor &pp, ASTContext &ctxt, std::vector &prevInGroup); @@ -395,7 +417,17 @@ private: /// a constant expression of type int with a value greater than zero. If the /// array has an incomplete type or a valid constant size, return false, /// otherwise emit a diagnostic and return true. - bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc); + bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc); + + //===--------------------------------------------------------------------===// + // Extra semantic analysis beyond the C type system + private: + + void CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl, + Expr** Args, unsigned NumArgsInCall); + + void CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx, + Expr** Args, unsigned NumArgsInCall); }; diff --git a/Sema/SemaChecking.cpp b/Sema/SemaChecking.cpp new file mode 100644 index 0000000000..883cbb6e70 --- /dev/null +++ b/Sema/SemaChecking.cpp @@ -0,0 +1,90 @@ +//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by Ted Kremenek and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements extra semantic analysis beyond what is enforced +// by the C type system. +// +//===----------------------------------------------------------------------===// + +#include "Sema.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/Lex/Preprocessor.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +using namespace clang; + +/// CheckFunctionCall - Check a direct function call for various correctness +/// and safety properties not strictly enforced by the C type system. +void +Sema::CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl, + Expr** Args, unsigned NumArgsInCall) { + + // Get the IdentifierInfo* for the called function. + IdentifierInfo *FnInfo = FDecl->getIdentifier(); + + // Search the KnownFunctionIDs for the identifier. + unsigned i = 0, e = id_num_known_functions; + for ( ; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; } + if( i == e ) return; + + // Printf checking. + if (i <= id_vprintf) { + // Retrieve the index of the format string parameter. + unsigned format_idx = 0; + switch (i) { + default: assert(false && "No format string argument index."); + case id_printf: format_idx = 0; break; + case id_fprintf: format_idx = 1; break; + case id_sprintf: format_idx = 1; break; + case id_snprintf: format_idx = 2; break; + case id_vsnprintf: format_idx = 2; break; + case id_asprintf: format_idx = 1; break; + case id_vasprintf: format_idx = 1; break; + case id_vfprintf: format_idx = 1; break; + case id_vsprintf: format_idx = 1; break; + case id_vprintf: format_idx = 1; break; + } + CheckPrintfArguments(Fn, FDecl, format_idx, Args, NumArgsInCall); + } +} + +/// CheckPrintfArguments - Check calls to printf (and similar functions) for +/// correct use of format strings. Improper format strings to functions in +/// the printf family can be the source of bizarre bugs and very serious +/// security holes. A good source of information is available in the following +/// paper (which includes additional references): +/// +/// FormatGuard: Automatic Protection From printf Format String +/// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001. +void +Sema::CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx, + Expr** Args, unsigned NumArgsInCall) { + + assert( format_idx < NumArgsInCall ); + + // CHECK: format string is not a string literal. + // + // Dynamically generated format strings are difficult to automatically + // vet at compile time. Requiring that format strings are string literals + // (1) permits the checking of format strings by the compiler and thereby + // (2) can practically remove the source of many format string exploits. + + StringLiteral *FExpr = dyn_cast(Args[format_idx]); + + if ( FExpr == NULL ) + Diag( Args[format_idx]->getLocStart(), + diag::warn_printf_not_string_constant, Fn->getSourceRange() ); +} \ No newline at end of file diff --git a/Sema/SemaExpr.cpp b/Sema/SemaExpr.cpp index 8a3576aaf3..6b871b4812 100644 --- a/Sema/SemaExpr.cpp +++ b/Sema/SemaExpr.cpp @@ -22,6 +22,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" using namespace clang; /// ParseStringLiteral - The specified tokens were lexed as pasted string @@ -555,6 +556,13 @@ ParseCallExpr(ExprTy *fn, SourceLocation LParenLoc, if (NumArgsInCall != NumArgsInProto && !proto->isVariadic()) return true; } + + // Do special checking on direct calls to functions. + if (ImplicitCastExpr *IcExpr = dyn_cast(Fn)) + if (DeclRefExpr *DRExpr = dyn_cast(IcExpr->getSubExpr())) + if (FunctionDecl *FDecl = dyn_cast(DRExpr->getDecl())) + CheckFunctionCall(Fn, FDecl, Args, NumArgsInCall); + return new CallExpr(Fn, Args, NumArgsInCall, resultType, RParenLoc); } diff --git a/clang.xcodeproj/project.pbxproj b/clang.xcodeproj/project.pbxproj index 9dd21df436..b7047ee1b1 100644 --- a/clang.xcodeproj/project.pbxproj +++ b/clang.xcodeproj/project.pbxproj @@ -120,6 +120,7 @@ DEF2E95F0C5FBD74000C4259 /* InternalsManual.html in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEF2E95E0C5FBD74000C4259 /* InternalsManual.html */; }; DEF2EDA70C6A4252000C4259 /* StmtDumper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2EDA60C6A4252000C4259 /* StmtDumper.cpp */; }; DEF2EFF30C6CDD74000C4259 /* CGAggExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2EFF20C6CDD74000C4259 /* CGAggExpr.cpp */; }; + DEF2F0100C6CFED5000C4259 /* SemaChecking.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */; }; F0226FD20C18084500141F42 /* TextDiagnosticPrinter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */; }; F0226FD30C18084500141F42 /* TextDiagnosticPrinter.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */; }; /* End PBXBuildFile section */ @@ -307,6 +308,7 @@ DEF2E95E0C5FBD74000C4259 /* InternalsManual.html */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text.html; name = InternalsManual.html; path = docs/InternalsManual.html; sourceTree = ""; }; DEF2EDA60C6A4252000C4259 /* StmtDumper.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StmtDumper.cpp; path = AST/StmtDumper.cpp; sourceTree = ""; }; DEF2EFF20C6CDD74000C4259 /* CGAggExpr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGAggExpr.cpp; path = CodeGen/CGAggExpr.cpp; sourceTree = ""; }; + DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaChecking.cpp; path = Sema/SemaChecking.cpp; sourceTree = ""; }; F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TextDiagnosticPrinter.cpp; path = Driver/TextDiagnosticPrinter.cpp; sourceTree = ""; }; F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TextDiagnosticPrinter.h; path = Driver/TextDiagnosticPrinter.h; sourceTree = ""; }; /* End PBXFileReference section */ @@ -401,6 +403,7 @@ DE67E7190C020F4F00F66BC5 /* ASTStreamer.cpp */, DE67E7140C020EDF00F66BC5 /* Sema.h */, DE67E7160C020EE400F66BC5 /* Sema.cpp */, + DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */, DE67E7120C020ED900F66BC5 /* SemaDecl.cpp */, DE67E7100C020ED400F66BC5 /* SemaExpr.cpp */, DE67E70E0C020ECF00F66BC5 /* SemaExprCXX.cpp */, @@ -683,6 +686,7 @@ DEF2E9340C5FBA02000C4259 /* ASTStreamers.cpp in Sources */, DEF2EDA70C6A4252000C4259 /* StmtDumper.cpp in Sources */, DEF2EFF30C6CDD74000C4259 /* CGAggExpr.cpp in Sources */, + DEF2F0100C6CFED5000C4259 /* SemaChecking.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/include/clang/Basic/DiagnosticKinds.def b/include/clang/Basic/DiagnosticKinds.def index b6958540be..041f5537dd 100644 --- a/include/clang/Basic/DiagnosticKinds.def +++ b/include/clang/Basic/DiagnosticKinds.def @@ -658,6 +658,10 @@ DIAG(err_typecheck_choose_expr_requires_constant, ERROR, "'__builtin_choose_expr' requires a constant expression") DIAG(warn_unused_expr, WARNING, "expression result unused") + +// Printf checking +DIAG(warn_printf_not_string_constant, WARNING, + "format string is not a string literal") // Statements. DIAG(err_continue_not_in_loop, ERROR, @@ -690,4 +694,6 @@ DIAG(ext_return_missing_expr, EXTENSION, DIAG(ext_return_has_expr, EXTENSION, "void function '%0' should not return a value") + + #undef DIAG diff --git a/test/Sema/format-strings.c b/test/Sema/format-strings.c new file mode 100644 index 0000000000..f71cd58645 --- /dev/null +++ b/test/Sema/format-strings.c @@ -0,0 +1,23 @@ +// RUN: clang -parse-ast-check %s + +#include +#include + +void check_string_literal( FILE* fp, const char* s, char *buf, ... ) { + + char * b; + va_list ap; + va_start(ap,buf); + + printf(s); // expected-warning {{format string is not a string literal}} + vprintf(s,ap); // expected-warning {{format string is not a string liter}} + fprintf(fp,s); // expected-warning {{format string is not a string literal}} + vfprintf(fp,s,ap); // expected-warning {{format string is not a string lit}} + asprintf(&b,s); // expected-warning {{format string is not a string lit}} + vasprintf(&b,s,ap); // expected-warning {{format string is not a string lit}} + sprintf(buf,s); // expected-warning {{format string is not a string literal}} + snprintf(buf,2,s); // expected-warning {{format string is not a string lit}} + vsprintf(buf,s,ap); // expected-warning {{format string is not a string lit}} + vsnprintf(buf,2,s,ap); // expected-warning {{mat string is not a string lit}} +} +