[analyzer] Clean up modeling of strcmp, including cases where a string literal has...

author Jordy Rose <jediknil@belkadan.com>

Thu, 16 Jun 2011 07:13:34 +0000 (07:13 +0000)

committer Jordy Rose <jediknil@belkadan.com>

Thu, 16 Jun 2011 07:13:34 +0000 (07:13 +0000)
author Jordy Rose <jediknil@belkadan.com>
Thu, 16 Jun 2011 07:13:34 +0000 (07:13 +0000)
committer Jordy Rose <jediknil@belkadan.com>
Thu, 16 Jun 2011 07:13:34 +0000 (07:13 +0000)
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp

index 2ea6e0643b69dc4198972081408973b911789397..84c201026dd7a0fcefa87f3c1e53a64e405853f4 100644 (file)
--- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -1386,24 +1386,24 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
  }
  
  void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
-  //int strcmp(const char *restrict s1, const char *restrict s2);
+  //int strcmp(const char *s1, const char *s2);
    evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
  }
  
  void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
-  //int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
+  //int strncmp(const char *s1, const char *s2, size_t n);
    evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
  }
  
  void CStringChecker::evalStrcasecmp(CheckerContext &C, 
                                      const CallExpr *CE) const {
-  //int strcasecmp(const char *restrict s1, const char *restrict s2);
+  //int strcasecmp(const char *s1, const char *s2);
    evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
  }
  
  void CStringChecker::evalStrncasecmp(CheckerContext &C, 
                                       const CallExpr *CE) const {
-  //int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n);
+  //int strncasecmp(const char *s1, const char *s2, size_t n);
    evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
  }
  
@@ -1435,52 +1435,96 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
    if (s2Length.isUndef())
      return;
  
-  // Get the string literal of the first string.
-  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
-  if (!s1StrLiteral)
-    return;
-  llvm::StringRef s1StrRef = s1StrLiteral->getString();
+  // If we know the two buffers are the same, we know the result is 0.
+  // First, get the two buffers' addresses. Another checker will have already
+  // made sure they're not undefined.
+  DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
+  DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
+
+  // See if they are the same.
+  SValBuilder &svalBuilder = C.getSValBuilder();
+  DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
+  const GRState *StSameBuf, *StNotSameBuf;
+  llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
+
+  // If the two arguments might be the same buffer, we know the result is 0,
+  // and we only need to check one size.
+  if (StSameBuf) {
+    StSameBuf = StSameBuf->BindExpr(CE, svalBuilder.makeZeroVal(CE->getType()));
+    C.addTransition(StSameBuf);
+
+    // If the two arguments are GUARANTEED to be the same, we're done!
+    if (!StNotSameBuf)
+      return;
+  }
+
+  assert(StNotSameBuf);
+  state = StNotSameBuf;
  
-  // Get the string literal of the second string.
+  // At this point we can go about comparing the two buffers.
+  // For now, we only do this if they're both known string literals.
+
+  // Attempt to extract string literals from both expressions.
+  const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
    const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
-  if (!s2StrLiteral)
-    return;
-  llvm::StringRef s2StrRef = s2StrLiteral->getString();
+  bool canComputeResult = false;
+
+  if (s1StrLiteral && s2StrLiteral) {
+    llvm::StringRef s1StrRef = s1StrLiteral->getString();
+    llvm::StringRef s2StrRef = s2StrLiteral->getString();
+
+    if (isBounded) {
+      // Get the max number of characters to compare.
+      const Expr *lenExpr = CE->getArg(2);
+      SVal lenVal = state->getSVal(lenExpr);
+
+      // If the length is known, we can get the right substrings.
+      if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
+        // Create substrings of each to compare the prefix.
+        s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
+        s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
+        canComputeResult = true;
+      }
+    } else {
+      // This is a normal, unbounded strcmp.
+      canComputeResult = true;
+    }
  
-  int result;
-  if (isBounded) {
-    // Get the max number of characters to compare.
-    const Expr *lenExpr = CE->getArg(2);
-    SVal lenVal = state->getSVal(lenExpr);
+    if (canComputeResult) {
+      // Real strcmp stops at null characters.
+      size_t s1Term = s1StrRef.find('\0');
+      if (s1Term != llvm::StringRef::npos)
+        s1StrRef = s1StrRef.substr(0, s1Term);
  
-    // Dynamically cast the length to a ConcreteInt. If it is not a ConcreteInt
-    // then give up, otherwise get the value and use it as the bounds.
-    nonloc::ConcreteInt *CI = dyn_cast<nonloc::ConcreteInt>(&lenVal);
-    if (!CI)
-      return;
-    llvm::APSInt lenInt(CI->getValue());
+      size_t s2Term = s2StrRef.find('\0');
+      if (s2Term != llvm::StringRef::npos)
+        s2StrRef = s2StrRef.substr(0, s2Term);
+
+      // Use StringRef's comparison methods to compute the actual result.
+      int result;
+
+      if (ignoreCase) {
+        // Compare string 1 to string 2 the same way strcasecmp() does.
+        result = s1StrRef.compare_lower(s2StrRef);
+      } else {
+        // Compare string 1 to string 2 the same way strcmp() does.
+        result = s1StrRef.compare(s2StrRef);
+      }
  
-    // Create substrings of each to compare the prefix.
-    s1StrRef = s1StrRef.substr(0, (size_t)lenInt.getLimitedValue());
-    s2StrRef = s2StrRef.substr(0, (size_t)lenInt.getLimitedValue());
+      // Build the SVal of the comparison and bind the return value.
+      SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
+      state = state->BindExpr(CE, resultVal);
+    }
    }
  
-  if (ignoreCase) {
-    // Compare string 1 to string 2 the same way strcasecmp() does.
-    result = s1StrRef.compare_lower(s2StrRef);
-  } else {
-    // Compare string 1 to string 2 the same way strcmp() does.
-    result = s1StrRef.compare(s2StrRef);
+  if (!canComputeResult) {
+    // Conjure a symbolic value. It's the best we can do.
+    unsigned Count = C.getNodeBuilder().getCurrentBlockCount();
+    SVal resultVal = svalBuilder.getConjuredSymbolVal(NULL, CE, Count);
+    state = state->BindExpr(CE, resultVal);
    }
-  
-  // Build the SVal of the comparison to bind the return value.
-  SValBuilder &svalBuilder = C.getSValBuilder();
-  QualType intTy = svalBuilder.getContext().IntTy;
-  SVal resultVal = svalBuilder.makeIntVal(result, intTy);
  
-  // Bind the return value of the expression.
-  // Set the return value.
-  state = state->BindExpr(CE, resultVal);
+  // Record this as a possible path.
    C.addTransition(state);
  }
  
diff --git a/test/Analysis/string.c b/test/Analysis/string.c

index 98fd8c09c4113dc4971486933789953ebff9bcd6..4b6a0665d91668b587fafc0c101c7eb8eaba8376 100644 (file)
--- a/test/Analysis/string.c
+++ b/test/Analysis/string.c
@@ -542,7 +542,7 @@ void strncat_no_overflow_2(char *y) {
  //===----------------------------------------------------------------------===
  
  #define strcmp BUILTIN(strcmp)
-int strcmp(const char *restrict s1, const char *restrict s2);
+int strcmp(const char * s1, const char * s2);
  
  void strcmp_constant0() {
    if (strcmp("123", "123") != 0)
@@ -622,12 +622,22 @@ void strcmp_diff_length_3() {
      (void)*(char*)0; // no-warning
  }
  
+void strcmp_embedded_null () {
+       if (strcmp("\0z", "\0y") != 0)
+               (void)*(char*)0; // no-warning
+}
+
+void strcmp_unknown_arg (char *unknown) {
+       if (strcmp(unknown, unknown) != 0)
+               (void)*(char*)0; // no-warning
+}
+
  //===----------------------------------------------------------------------===
  // strncmp()
  //===----------------------------------------------------------------------===
  
  #define strncmp BUILTIN(strncmp)
-int strncmp(const char *restrict s1, const char *restrict s2, size_t n);
+int strncmp(const char *s1, const char *s2, size_t n);
  
  void strncmp_constant0() {
    if (strncmp("123", "123", 3) != 0)
@@ -728,12 +738,17 @@ void strncmp_diff_length_6() {
      (void)*(char*)0; // no-warning
  }
  
+void strncmp_embedded_null () {
+       if (strncmp("ab\0zz", "ab\0yy", 4) != 0)
+               (void)*(char*)0; // no-warning
+}
+
  //===----------------------------------------------------------------------===
  // strcasecmp()
  //===----------------------------------------------------------------------===
  
  #define strcasecmp BUILTIN(strcasecmp)
-int strcasecmp(const char *restrict s1, const char *restrict s2);
+int strcasecmp(const char *s1, const char *s2);
  
  void strcasecmp_constant0() {
    if (strcasecmp("abc", "Abc") != 0)
@@ -813,12 +828,17 @@ void strcasecmp_diff_length_3() {
      (void)*(char*)0; // no-warning
  }
  
+void strcasecmp_embedded_null () {
+       if (strcasecmp("ab\0zz", "ab\0yy") != 0)
+               (void)*(char*)0; // no-warning
+}
+
  //===----------------------------------------------------------------------===
  // strncasecmp()
  //===----------------------------------------------------------------------===
  
  #define strncasecmp BUILTIN(strncasecmp)
-int strncasecmp(const char *restrict s1, const char *restrict s2, size_t n);
+int strncasecmp(const char *s1, const char *s2, size_t n);
  
  void strncasecmp_constant0() {
    if (strncasecmp("abc", "Abc", 3) != 0)
@@ -918,3 +938,8 @@ void strncasecmp_diff_length_6() {
    if (strncasecmp(x, y, 3) != 1)
      (void)*(char*)0; // no-warning
  }
+
+void strncasecmp_embedded_null () {
+       if (strncasecmp("ab\0zz", "ab\0yy", 4) != 0)
+               (void)*(char*)0; // no-warning
+}
author	Jordy Rose <jediknil@belkadan.com>
	Thu, 16 Jun 2011 07:13:34 +0000 (07:13 +0000)
committer	Jordy Rose <jediknil@belkadan.com>
	Thu, 16 Jun 2011 07:13:34 +0000 (07:13 +0000)
lib/StaticAnalyzer/Checkers/CStringChecker.cpp		patch \| blob \| history
test/Analysis/string.c		patch \| blob \| history