]> granicus.if.org Git - clang/commitdiff
initial support for checking format strings, patch by Ted Kremenek:
authorChris Lattner <sabre@nondot.org>
Fri, 10 Aug 2007 20:18:51 +0000 (20:18 +0000)
committerChris Lattner <sabre@nondot.org>
Fri, 10 Aug 2007 20:18:51 +0000 (20:18 +0000)
"I've coded up some support in clang to flag warnings for non-constant format strings used in calls to printf-like functions (all the functions listed in "man fprintf").  Non-constant format strings are a source of many security exploits in C/C++ programs, and I believe are currently detected by gcc using the flag -Wformat-nonliteral."

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@41003 91177308-0d34-0410-b5e6-96231b3b80d8

Sema/Sema.cpp
Sema/Sema.h
Sema/SemaChecking.cpp [new file with mode: 0644]
Sema/SemaExpr.cpp
clang.xcodeproj/project.pbxproj
include/clang/Basic/DiagnosticKinds.def
test/Sema/format-strings.c [new file with mode: 0644]

index 64c18a80c0e41f34e98ca2de0cc8c7b349689715..76771d0d5c92ad3f2053e5da178c74f8a9173812 100644 (file)
 #include "clang/AST/ASTContext.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Basic/Diagnostic.h"
+
 using namespace clang;
 
 Sema::Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup)
   : PP(pp), Context(ctxt), CurFunctionDecl(0), LastInGroupList(prevInGroup) {
+  
+  // Get IdentifierInfo objects for known functions for which we
+  // do extra checking.  
+  IdentifierTable& IT = PP.getIdentifierTable();  
+
+  KnownFunctionIDs[ id_printf  ] = &IT.get("printf");
+  KnownFunctionIDs[ id_fprintf ] = &IT.get("fprintf");
+  KnownFunctionIDs[ id_sprintf ] = &IT.get("sprintf");
+  KnownFunctionIDs[ id_snprintf ] = &IT.get("snprintf");
+  KnownFunctionIDs[ id_vsnprintf ] = &IT.get("vsnprintf");
+  KnownFunctionIDs[ id_asprintf ] = &IT.get("asprintf");
+  KnownFunctionIDs[ id_vasprintf ] = &IT.get("vasprintf");
+  KnownFunctionIDs[ id_vfprintf ] = &IT.get("vfprintf");
+  KnownFunctionIDs[ id_vsprintf ] = &IT.get("vsprintf");
+  KnownFunctionIDs[ id_vprintf ] = &IT.get("vprintf");
 }
 
 //===----------------------------------------------------------------------===//
index b6d47971f0b5baa2de0039c88835b06a26c1d3d3..3cf75328c85d4163f2208de4d5793976ca46a910 100644 (file)
@@ -68,6 +68,28 @@ class Sema : public Action {
   /// us to associate a raw vector type with one of the OCU type names.
   /// This is only necessary for issuing pretty diagnostics.
   llvm::SmallVector<TypedefDecl*, 24> OCUVectorDecls;
+
+  // Enum values used by KnownFunctionIDs (see below).
+  enum {
+    id_printf,
+    id_fprintf,
+    id_sprintf,
+    id_snprintf,
+    id_vsnprintf,
+    id_asprintf,
+    id_vasprintf,
+    id_vfprintf,
+    id_vsprintf,
+    id_vprintf,
+    id_num_known_functions
+  };
+  
+  /// KnownFunctionIDs - This is a list of IdentifierInfo objects to a set
+  /// of known functions used by the semantic analysis to do various
+  /// kinds of checking (e.g. checking format string errors in printf calls).
+  /// This list is populated upon the creation of a Sema object.    
+  IdentifierInfo* KnownFunctionIDs[ id_num_known_functions ];
+  
 public:
   Sema(Preprocessor &pp, ASTContext &ctxt, std::vector<Decl*> &prevInGroup);
   
@@ -395,7 +417,17 @@ private:
   /// a constant expression of type int with a value greater than zero.  If the
   /// array has an incomplete type or a valid constant size, return false,
   /// otherwise emit a diagnostic and return true.
-  bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc); 
+  bool VerifyConstantArrayType(const ArrayType *ary, SourceLocation loc);
+  
+  //===--------------------------------------------------------------------===//
+  // Extra semantic analysis beyond the C type system
+  private:
+  
+  void CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl,
+                         Expr** Args, unsigned NumArgsInCall);
+
+  void CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx,
+                            Expr** Args, unsigned NumArgsInCall);
 };
 
 
diff --git a/Sema/SemaChecking.cpp b/Sema/SemaChecking.cpp
new file mode 100644 (file)
index 0000000..883cbb6
--- /dev/null
@@ -0,0 +1,90 @@
+//===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file was developed by Ted Kremenek and is distributed under
+// the University of Illinois Open Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements extra semantic analysis beyond what is enforced 
+//  by the C type system.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sema.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/TargetInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace clang;
+
+/// CheckFunctionCall - Check a direct function call for various correctness
+/// and safety properties not strictly enforced by the C type system.
+void
+Sema::CheckFunctionCall(Expr *Fn, FunctionDecl *FDecl,
+                        Expr** Args, unsigned NumArgsInCall) {
+                        
+  // Get the IdentifierInfo* for the called function.
+  IdentifierInfo *FnInfo = FDecl->getIdentifier();
+  
+  // Search the KnownFunctionIDs for the identifier.
+  unsigned i = 0, e = id_num_known_functions;
+  for ( ; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
+  if( i == e ) return;
+  
+  // Printf checking.
+  if (i <= id_vprintf) {
+    // Retrieve the index of the format string parameter.
+    unsigned format_idx = 0;
+    switch (i) {
+      default: assert(false && "No format string argument index.");
+      case id_printf:    format_idx = 0; break;
+      case id_fprintf:   format_idx = 1; break;
+      case id_sprintf:   format_idx = 1; break;
+      case id_snprintf:  format_idx = 2; break;
+      case id_vsnprintf: format_idx = 2; break;
+      case id_asprintf:  format_idx = 1; break;
+      case id_vasprintf: format_idx = 1; break;
+      case id_vfprintf:  format_idx = 1; break;
+      case id_vsprintf:  format_idx = 1; break;
+      case id_vprintf:   format_idx = 1; break;
+    }    
+    CheckPrintfArguments(Fn, FDecl, format_idx, Args, NumArgsInCall);
+  }
+}
+
+/// CheckPrintfArguments - Check calls to printf (and similar functions) for
+/// correct use of format strings.  Improper format strings to functions in
+/// the printf family can be the source of bizarre bugs and very serious
+/// security holes.  A good source of information is available in the following
+/// paper (which includes additional references):
+///
+///  FormatGuard: Automatic Protection From printf Format String
+///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
+void
+Sema::CheckPrintfArguments(Expr *Fn, FunctionDecl *FDecl, unsigned format_idx,
+                           Expr** Args, unsigned NumArgsInCall) {
+                           
+  assert( format_idx < NumArgsInCall );
+
+  // CHECK: format string is not a string literal.
+  // 
+  // Dynamically generated format strings are difficult to automatically
+  // vet at compile time.  Requiring that format strings are string literals
+  // (1) permits the checking of format strings by the compiler and thereby
+  // (2) can practically remove the source of many format string exploits.
+
+  StringLiteral *FExpr = dyn_cast<StringLiteral>(Args[format_idx]);
+  
+  if ( FExpr == NULL )
+    Diag( Args[format_idx]->getLocStart(), 
+          diag::warn_printf_not_string_constant, Fn->getSourceRange() );
+}
\ No newline at end of file
index 8a3576aaf3a242e0ead26add7146cf770672c627..6b871b4812ba70c4958f07d0377bb6b2a61c47fa 100644 (file)
@@ -22,6 +22,7 @@
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace clang;
 
 /// ParseStringLiteral - The specified tokens were lexed as pasted string
@@ -555,6 +556,13 @@ ParseCallExpr(ExprTy *fn, SourceLocation LParenLoc,
     if (NumArgsInCall != NumArgsInProto && !proto->isVariadic())
       return true;
   }
+  
+  // Do special checking on direct calls to functions.
+  if (ImplicitCastExpr *IcExpr = dyn_cast<ImplicitCastExpr>(Fn))
+    if (DeclRefExpr *DRExpr = dyn_cast<DeclRefExpr>(IcExpr->getSubExpr()))
+      if (FunctionDecl *FDecl = dyn_cast<FunctionDecl>(DRExpr->getDecl()))
+        CheckFunctionCall(Fn, FDecl, Args, NumArgsInCall);
+
   return new CallExpr(Fn, Args, NumArgsInCall, resultType, RParenLoc);
 }
 
index 9dd21df436a54664bd0c4004d2f8cdd0b52fe974..b7047ee1b1ce1fb24a292762b3dc61cb64de59d6 100644 (file)
                DEF2E95F0C5FBD74000C4259 /* InternalsManual.html in CopyFiles */ = {isa = PBXBuildFile; fileRef = DEF2E95E0C5FBD74000C4259 /* InternalsManual.html */; };
                DEF2EDA70C6A4252000C4259 /* StmtDumper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2EDA60C6A4252000C4259 /* StmtDumper.cpp */; };
                DEF2EFF30C6CDD74000C4259 /* CGAggExpr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2EFF20C6CDD74000C4259 /* CGAggExpr.cpp */; };
+               DEF2F0100C6CFED5000C4259 /* SemaChecking.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */; };
                F0226FD20C18084500141F42 /* TextDiagnosticPrinter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */; };
                F0226FD30C18084500141F42 /* TextDiagnosticPrinter.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */; };
 /* End PBXBuildFile section */
                DEF2E95E0C5FBD74000C4259 /* InternalsManual.html */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = text.html; name = InternalsManual.html; path = docs/InternalsManual.html; sourceTree = "<group>"; };
                DEF2EDA60C6A4252000C4259 /* StmtDumper.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = StmtDumper.cpp; path = AST/StmtDumper.cpp; sourceTree = "<group>"; };
                DEF2EFF20C6CDD74000C4259 /* CGAggExpr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGAggExpr.cpp; path = CodeGen/CGAggExpr.cpp; sourceTree = "<group>"; };
+               DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = SemaChecking.cpp; path = Sema/SemaChecking.cpp; sourceTree = "<group>"; };
                F0226FD00C18084500141F42 /* TextDiagnosticPrinter.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TextDiagnosticPrinter.cpp; path = Driver/TextDiagnosticPrinter.cpp; sourceTree = "<group>"; };
                F0226FD10C18084500141F42 /* TextDiagnosticPrinter.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TextDiagnosticPrinter.h; path = Driver/TextDiagnosticPrinter.h; sourceTree = "<group>"; };
 /* End PBXFileReference section */
                                DE67E7190C020F4F00F66BC5 /* ASTStreamer.cpp */,
                                DE67E7140C020EDF00F66BC5 /* Sema.h */,
                                DE67E7160C020EE400F66BC5 /* Sema.cpp */,
+                               DEF2F00F0C6CFED5000C4259 /* SemaChecking.cpp */,
                                DE67E7120C020ED900F66BC5 /* SemaDecl.cpp */,
                                DE67E7100C020ED400F66BC5 /* SemaExpr.cpp */,
                                DE67E70E0C020ECF00F66BC5 /* SemaExprCXX.cpp */,
                                DEF2E9340C5FBA02000C4259 /* ASTStreamers.cpp in Sources */,
                                DEF2EDA70C6A4252000C4259 /* StmtDumper.cpp in Sources */,
                                DEF2EFF30C6CDD74000C4259 /* CGAggExpr.cpp in Sources */,
+                               DEF2F0100C6CFED5000C4259 /* SemaChecking.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index b6958540be9cb5e7d69bfaa32d751146ddf4e06e..041f5537dda5919c66db2842a9f34e80e0861c27 100644 (file)
@@ -658,6 +658,10 @@ DIAG(err_typecheck_choose_expr_requires_constant, ERROR,
      "'__builtin_choose_expr' requires a constant expression")
 DIAG(warn_unused_expr, WARNING,
      "expression result unused")
+     
+// Printf checking
+DIAG(warn_printf_not_string_constant, WARNING,
+     "format string is not a string literal")
 
 // Statements.
 DIAG(err_continue_not_in_loop, ERROR,
@@ -690,4 +694,6 @@ DIAG(ext_return_missing_expr, EXTENSION,
 DIAG(ext_return_has_expr, EXTENSION,
      "void function '%0' should not return a value")
 
+
+
 #undef DIAG
diff --git a/test/Sema/format-strings.c b/test/Sema/format-strings.c
new file mode 100644 (file)
index 0000000..f71cd58
--- /dev/null
@@ -0,0 +1,23 @@
+// RUN: clang -parse-ast-check %s
+
+#include <stdio.h>
+#include <stdarg.h>
+
+void check_string_literal( FILE* fp, const char* s, char *buf, ... ) {
+
+  char * b;
+  va_list ap;
+  va_start(ap,buf);
+
+  printf(s); // expected-warning {{format string is not a string literal}}
+  vprintf(s,ap); // expected-warning {{format string is not a string liter}}
+  fprintf(fp,s); // expected-warning {{format string is not a string literal}}
+  vfprintf(fp,s,ap); // expected-warning {{format string is not a string lit}}
+  asprintf(&b,s); // expected-warning {{format string is not a string lit}}
+  vasprintf(&b,s,ap); // expected-warning {{format string is not a string lit}}
+  sprintf(buf,s); // expected-warning {{format string is not a string literal}}
+  snprintf(buf,2,s); // expected-warning {{format string is not a string lit}}
+  vsprintf(buf,s,ap); // expected-warning {{format string is not a string lit}}
+  vsnprintf(buf,2,s,ap); // expected-warning {{mat string is not a string lit}}
+}
+