]> granicus.if.org Git - clang/commitdiff
Add the ability to output static analysis results to SARIF.
authorAaron Ballman <aaron@aaronballman.com>
Tue, 30 Oct 2018 18:55:38 +0000 (18:55 +0000)
committerAaron Ballman <aaron@aaronballman.com>
Tue, 30 Oct 2018 18:55:38 +0000 (18:55 +0000)
This allows users to specify SARIF (https://github.com/oasis-tcs/sarif-spec) as the output from the clang static analyzer so that the results can be read in by other tools, such as extensions to Visual Studio and VSCode, as well as static analyzers like CodeSonar.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@345628 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/StaticAnalyzer/Core/Analyses.def
include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
lib/StaticAnalyzer/Core/CMakeLists.txt
lib/StaticAnalyzer/Core/SarifDiagnostics.cpp [new file with mode: 0644]
test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif [new file with mode: 0644]
test/Analysis/diagnostics/sarif-diagnostics-taint-test.c [new file with mode: 0644]

index 281a2ac3a66fece762f5acc52d5a08661331aa8d..99e26c75e1c21613e9ccaeb00dd4f3d628cbf40f 100644 (file)
@@ -33,6 +33,7 @@ ANALYSIS_DIAGNOSTICS(HTML_SINGLE_FILE, "html-single-file", "Output analysis resu
 ANALYSIS_DIAGNOSTICS(PLIST, "plist", "Output analysis results using Plists", createPlistDiagnosticConsumer)
 ANALYSIS_DIAGNOSTICS(PLIST_MULTI_FILE, "plist-multi-file", "Output analysis results using Plists (allowing for multi-file bugs)", createPlistMultiFileDiagnosticConsumer)
 ANALYSIS_DIAGNOSTICS(PLIST_HTML, "plist-html", "Output analysis results using HTML wrapped with Plists", createPlistHTMLDiagnosticConsumer)
+ANALYSIS_DIAGNOSTICS(SARIF, "sarif", "Output analysis results in a SARIF file", createSarifDiagnosticConsumer)
 ANALYSIS_DIAGNOSTICS(TEXT, "text", "Text output of analysis results", createTextPathDiagnosticConsumer)
 
 #ifndef ANALYSIS_PURGE
index 7fff42903bc37ed908a3a31d700c8c946bebaf34..e9c682d7986cbb1c3b76e5864feeca1cc56b9128 100644 (file)
@@ -118,7 +118,7 @@ public:
     /// Only runs visitors, no output generated.
     None,
 
-    /// Used for HTML and text output.
+    /// Used for HTML, SARIF, and text output.
     Minimal,
 
     /// Used for plist output, used for "arrows" generation.
index 44310073a61f18294517968780aee2bddd5a51d2..17334d841e068c55554c337b19523e3afe684875 100644 (file)
@@ -45,12 +45,13 @@ add_clang_library(clangStaticAnalyzerCore
   RangedConstraintManager.cpp
   RegionStore.cpp
   RetainSummaryManager.cpp
-  SValBuilder.cpp
-  SVals.cpp
+  SarifDiagnostics.cpp
   SimpleConstraintManager.cpp
   SimpleSValBuilder.cpp
   Store.cpp
   SubEngine.cpp
+  SValBuilder.cpp
+  SVals.cpp
   SymbolManager.cpp
   TaintManager.cpp
   WorkList.cpp
diff --git a/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp b/lib/StaticAnalyzer/Core/SarifDiagnostics.cpp
new file mode 100644 (file)
index 0000000..a320883
--- /dev/null
@@ -0,0 +1,270 @@
+//===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the SarifDiagnostics object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Version.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
+#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/JSON.h"
+#include "llvm/Support/Path.h"
+
+using namespace llvm;
+using namespace clang;
+using namespace ento;
+
+namespace {
+class SarifDiagnostics : public PathDiagnosticConsumer {
+  std::string OutputFile;
+
+public:
+  SarifDiagnostics(AnalyzerOptions &, const std::string &Output)
+      : OutputFile(Output) {}
+  ~SarifDiagnostics() override = default;
+
+  void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
+                            FilesMade *FM) override;
+
+  StringRef getName() const override { return "SarifDiagnostics"; }
+  PathGenerationScheme getGenerationScheme() const override { return Minimal; }
+  bool supportsLogicalOpControlFlow() const override { return true; }
+  bool supportsCrossFileDiagnostics() const override { return true; }
+};
+} // end anonymous namespace
+
+void ento::createSarifDiagnosticConsumer(AnalyzerOptions &AnalyzerOpts,
+                                         PathDiagnosticConsumers &C,
+                                         const std::string &Output,
+                                         const Preprocessor &) {
+  C.push_back(new SarifDiagnostics(AnalyzerOpts, Output));
+}
+
+static StringRef getFileName(const FileEntry &FE) {
+  StringRef Filename = FE.tryGetRealPathName();
+  if (Filename.empty())
+    Filename = FE.getName();
+  return Filename;
+}
+
+static std::string percentEncodeURICharacter(char C) {
+  // RFC 3986 claims alpha, numeric, and this handful of
+  // characters are not reserved for the path component and
+  // should be written out directly. Otherwise, percent
+  // encode the character and write that out instead of the
+  // reserved character.
+  if (llvm::isAlnum(C) ||
+      StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
+    return std::string(&C, 1);
+  return "%" + llvm::toHex(StringRef(&C, 1));
+}
+
+static std::string fileNameToURI(StringRef Filename) {
+  llvm::SmallString<32> Ret = "file://";
+
+  // Get the root name to see if it has a URI authority.
+  StringRef Root = sys::path::root_name(Filename);
+  if (Root.startswith("//")) {
+    // There is an authority, so add it to the URI.
+    Ret += Root.drop_front(2).str();
+  } else {
+    // There is no authority, so end the component and add the root to the URI.
+    Ret += Twine("/" + Root).str();
+  }
+
+  // Add the rest of the path components, encoding any reserved characters.
+  std::for_each(std::next(sys::path::begin(Filename)), sys::path::end(Filename),
+                [&Ret](StringRef Component) {
+                  // For reasons unknown to me, we may get a backslash with
+                  // Windows native paths for the initial backslash following
+                  // the drive component, which we need to ignore as a URI path
+                  // part.
+                  if (Component == "\\")
+                    return;
+
+                  // Add the separator between the previous path part and the
+                  // one being currently processed.
+                  Ret += "/";
+
+                  // URI encode the part.
+                  for (char C : Component) {
+                    Ret += percentEncodeURICharacter(C);
+                  }
+                });
+
+  return Ret.str().str();
+}
+
+static json::Object createFileLocation(const FileEntry &FE) {
+  return json::Object{{"uri", fileNameToURI(getFileName(FE))}};
+}
+
+static json::Object createFile(const FileEntry &FE) {
+  return json::Object{{"fileLocation", createFileLocation(FE)},
+                      {"roles", json::Array{"resultFile"}},
+                      {"length", FE.getSize()},
+                      {"mimeType", "text/plain"}};
+}
+
+static json::Object createFileLocation(const FileEntry &FE,
+                                       json::Object &Files) {
+  std::string FileURI = fileNameToURI(getFileName(FE));
+  if (!Files.get(FileURI))
+    Files[FileURI] = createFile(FE);
+
+  return json::Object{{"uri", FileURI}};
+}
+
+static json::Object createTextRegion(SourceRange R, const SourceManager &SM) {
+  return json::Object{
+      {"startLine", SM.getExpansionLineNumber(R.getBegin())},
+      {"endLine", SM.getExpansionLineNumber(R.getEnd())},
+      {"startColumn", SM.getExpansionColumnNumber(R.getBegin())},
+      {"endColumn", SM.getExpansionColumnNumber(R.getEnd())}};
+}
+
+static json::Object createPhysicalLocation(SourceRange R, const FileEntry &FE,
+                                           const SourceManager &SMgr,
+                                           json::Object &Files) {
+  return json::Object{{{"fileLocation", createFileLocation(FE, Files)},
+                       {"region", createTextRegion(R, SMgr)}}};
+}
+
+enum class Importance { Important, Essential, Unimportant };
+
+static StringRef importanceToStr(Importance I) {
+  switch (I) {
+  case Importance::Important:
+    return "important";
+  case Importance::Essential:
+    return "essential";
+  case Importance::Unimportant:
+    return "unimportant";
+  }
+  llvm_unreachable("Fully covered switch is not so fully covered");
+}
+
+static json::Object createThreadFlowLocation(int Step, json::Object &&Location,
+                                             Importance I) {
+  return json::Object{{"step", Step},
+                      {"location", std::move(Location)},
+                      {"importance", importanceToStr(I)}};
+}
+
+static json::Object createMessage(StringRef Text) {
+  return json::Object{{"text", Text.str()}};
+}
+
+static json::Object createLocation(json::Object &&PhysicalLocation,
+                                   StringRef Message = "") {
+  json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
+  if (!Message.empty())
+    Ret.insert({"message", createMessage(Message)});
+  return Ret;
+}
+
+static Importance calculateImportance(const PathDiagnosticPiece &Piece) {
+  StringRef PieceStr = Piece.getString();
+
+  switch (Piece.getKind()) {
+  case PathDiagnosticPiece::Kind::Call:
+  case PathDiagnosticPiece::Kind::Macro:
+  case PathDiagnosticPiece::Kind::Note:
+    // FIXME: What should be reported here?
+    break;
+  case PathDiagnosticPiece::Kind::Event:
+    return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important
+                                                     : Importance::Essential;
+  case PathDiagnosticPiece::Kind::ControlFlow:
+    return Importance::Unimportant;
+  }
+  return Importance::Unimportant;
+}
+
+static json::Object createThreadFlow(const PathPieces &Pieces,
+                                     json::Object &Files) {
+  const SourceManager &SMgr = Pieces.front()->getLocation().getManager();
+  int Step = 1;
+  json::Array Locations;
+  for (const auto &Piece : Pieces) {
+    const PathDiagnosticLocation &P = Piece->getLocation();
+    Locations.push_back(createThreadFlowLocation(
+        Step++,
+        createLocation(createPhysicalLocation(P.asRange(),
+                                              *P.asLocation().getFileEntry(),
+                                              SMgr, Files),
+                       Piece->getString()),
+        calculateImportance(*Piece)));
+  }
+  return json::Object{{"locations", std::move(Locations)}};
+}
+
+static json::Object createCodeFlow(const PathPieces &Pieces,
+                                   json::Object &Files) {
+  return json::Object{
+      {"threadFlows", json::Array{createThreadFlow(Pieces, Files)}}};
+}
+
+static json::Object createTool() {
+  return json::Object{{"name", "clang"},
+                      {"fullName", "clang static analyzer"},
+                      {"language", "en-US"},
+                      {"version", getClangFullVersion()}};
+}
+
+static json::Object createResult(const PathDiagnostic &Diag,
+                                 json::Object &Files) {
+  const PathPieces &Path = Diag.path.flatten(false);
+  const SourceManager &SMgr = Path.front()->getLocation().getManager();
+
+  return json::Object{
+      {"message", createMessage(Diag.getVerboseDescription())},
+      {"codeFlows", json::Array{createCodeFlow(Path, Files)}},
+      {"locations",
+       json::Array{createLocation(createPhysicalLocation(
+           Diag.getLocation().asRange(),
+           *Diag.getLocation().asLocation().getFileEntry(), SMgr, Files))}},
+      {"ruleId", Diag.getCheckName()}};
+}
+
+static json::Object createRun(std::vector<const PathDiagnostic *> &Diags) {
+  json::Array Results;
+  json::Object Files;
+
+  llvm::for_each(Diags, [&](const PathDiagnostic *D) {
+    Results.push_back(createResult(*D, Files));
+  });
+
+  return json::Object{{"tool", createTool()},
+                      {"results", std::move(Results)},
+                      {"files", std::move(Files)}};
+}
+
+void SarifDiagnostics::FlushDiagnosticsImpl(
+    std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
+  // We currently overwrite the file if it already exists. However, it may be
+  // useful to add a feature someday that allows the user to append a run to an
+  // existing SARIF file. One danger from that approach is that the size of the
+  // file can become large very quickly, so decoding into JSON to append a run
+  // may be an expensive operation.
+  std::error_code EC;
+  llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::F_Text);
+  if (EC) {
+    llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
+    return;
+  }
+  json::Object Sarif{{"$schema", "http://json.schemastore.org/sarif-2.0.0"},
+                     {"version", "2.0.0-beta.2018-09-26"},
+                     {"runs", json::Array{createRun(Diags)}}};
+  OS << llvm::formatv("{0:2}", json::Value(std::move(Sarif)));
+}
diff --git a/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif b/test/Analysis/diagnostics/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
new file mode 100644 (file)
index 0000000..965bcb9
--- /dev/null
@@ -0,0 +1,99 @@
+{
+  "$schema": "http://json.schemastore.org/sarif-2.0.0",
+  "runs": [
+    {
+      "files": {
+        "file:sarif-diagnostics-taint-test.c": {
+          "fileLocation": {
+            "uri": "file:sarif-diagnostics-taint-test.c"
+          },
+          "length": 500,
+          "mimeType": "text/plain",
+          "roles": [
+            "resultFile"
+          ]
+        }
+      },
+      "results": [
+        {
+          "codeFlows": [
+            {
+              "threadFlows": [
+                {
+                  "locations": [
+                    {
+                      "importance": "essential",
+                      "location": {
+                        "message": {
+                          "text": "Calling 'f'"
+                        },
+                        "physicalLocation": {
+                          "fileLocation": {
+                            "uri": "file:sarif-diagnostics-taint-test.c"
+                          },
+                          "region": {
+                            "endColumn": 5,
+                            "endLine": 13,
+                            "startColumn": 3,
+                            "startLine": 13
+                          }
+                        }
+                      },
+                      "step": 1
+                    },
+                    {
+                      "importance": "essential",
+                      "location": {
+                        "message": {
+                          "text": "tainted"
+                        },
+                        "physicalLocation": {
+                          "fileLocation": {
+                            "uri": "file:sarif-diagnostics-taint-test.c"
+                          },
+                          "region": {
+                            "endColumn": 17,
+                            "endLine": 9,
+                            "startColumn": 11,
+                            "startLine": 9
+                          }
+                        }
+                      },
+                      "step": 2
+                    }
+                  ]
+                }
+              ]
+            }
+          ],
+          "locations": [
+            {
+              "physicalLocation": {
+                "fileLocation": {
+                  "uri": "file:sarif-diagnostics-taint-test.c"
+                },
+                "region": {
+                  "endColumn": 17,
+                  "endLine": 9,
+                  "startColumn": 11,
+                  "startLine": 9
+                }
+              }
+            }
+          ],
+          "message": {
+            "text": "tainted"
+          },
+          "ruleId": "debug.TaintTest"
+        }
+      ],
+      "tool": {
+        "fullName": "clang static analyzer",
+        "language": "en-US",
+        "name": "clang",
+        "version": "clang version 8.0.0 (https://github.com/llvm-project/clang.git a5ccb257a7a70928ede717a7c282f5fc8cbed310) (https://github.com/llvm-mirror/llvm.git 73cebd79c512f7129eca16b0f3a7abd21d2881e8)"
+      }
+    }
+  ],
+  "version": "2.0.0-beta.2018-09-26"
+}
diff --git a/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c b/test/Analysis/diagnostics/sarif-diagnostics-taint-test.c
new file mode 100644 (file)
index 0000000..63b6c12
--- /dev/null
@@ -0,0 +1,15 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.security.taint,debug.TaintTest %s -verify -analyzer-output=sarif -o - | diff -u1 -w -I ".*file:.*sarif-diagnostics-taint-test.c" -I "clang version" -I "2\.0\.0\-beta\." - %S/Inputs/expected-sarif/sarif-diagnostics-taint-test.c.sarif
+#include "../Inputs/system-header-simulator.h"
+
+int atoi(const char *nptr);
+
+void f(void) {
+  char s[80];
+  scanf("%s", s);
+  int d = atoi(s); // expected-warning {{tainted}}
+}
+
+int main(void) {
+  f();
+  return 0;
+}