Start improving diagnostics that relate to subcharacters of string literals.

author Chris Lattner <sabre@nondot.org>

Wed, 18 Feb 2009 17:49:48 +0000 (17:49 +0000)

committer Chris Lattner <sabre@nondot.org>

Wed, 18 Feb 2009 17:49:48 +0000 (17:49 +0000)
author Chris Lattner <sabre@nondot.org>
Wed, 18 Feb 2009 17:49:48 +0000 (17:49 +0000)
committer Chris Lattner <sabre@nondot.org>
Wed, 18 Feb 2009 17:49:48 +0000 (17:49 +0000)
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h

index ffdd996cedccec945c1fbc17d7bb641c5f2a6c6a..f06fe098172593ce3ea5a071c39d6afebf7d3802 100644 (file)
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -520,7 +520,6 @@ public:
    typedef const SourceLocation *tokloc_iterator;
    tokloc_iterator tokloc_begin() const { return TokLocs; }
    tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
-  
  
    virtual SourceRange getSourceRange() const { 
      return SourceRange(TokLocs[0], TokLocs[NumConcatenated-1]); 
diff --git a/lib/Sema/Sema.h b/lib/Sema/Sema.h

index a748e5c028cce3d00868c4686edddc41a6737193..4358051cc52764fb933d7e95c979a85147da7f0e 100644 (file)
--- a/lib/Sema/Sema.h
+++ b/lib/Sema/Sema.h
@@ -1989,6 +1989,8 @@ public:
  private:
    Action::OwningExprResult CheckFunctionCall(FunctionDecl *FDecl,
                                               CallExpr *TheCall);
+  SourceLocation getLocationOfStringLiteralByte(const StringLiteral *SL,
+                                                unsigned ByteNo) const;
    bool CheckObjCString(Expr *Arg);
    bool SemaBuiltinVAStart(CallExpr *TheCall);
    bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp

index db622f66483e01c810376f424335bcfae5b9e0d3..f469684e50323a3f6e555e4782a3c5a051bb2c8b 100644 (file)
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -20,6 +20,71 @@
  #include "clang/Lex/Preprocessor.h"
  using namespace clang;
  
+/// getLocationOfStringLiteralByte - Return a source location that points to the
+/// specified byte of the specified string literal.
+///
+/// Strings are amazingly complex.  They can be formed from multiple tokens and
+/// can have escape sequences in them in addition to the usual trigraph and
+/// escaped newline business.  This routine handles this complexity.
+///
+SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
+                                                    unsigned ByteNo) const {
+  assert(!SL->isWide() && "This doesn't work for wide strings yet");
+  
+  // Loop over all of the tokens in this string until we find the one that
+  // contains the byte we're looking for.
+  unsigned TokNo = 0;
+  while (1) {
+    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
+    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
+   
+    // Get the spelling of the string so that we can get the data that makes up
+    // the string literal, not the identifier for the macro it is potentially
+    // expanded through.
+    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
+
+    // Re-lex the token to get its length and original spelling.
+    std::pair<FileID, unsigned> LocInfo =
+      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
+    std::pair<const char *,const char *> Buffer =
+      SourceMgr.getBufferData(LocInfo.first);
+    const char *StrData = Buffer.first+LocInfo.second;
+    
+    // Create a langops struct and enable trigraphs.  This is sufficient for
+    // relexing tokens.
+    LangOptions LangOpts;
+    LangOpts.Trigraphs = true;
+    
+    // Create a lexer starting at the beginning of this token.
+    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
+                   Buffer.second);
+    Token TheTok;
+    TheLexer.LexFromRawLexer(TheTok);
+    
+    // The length of the string is the token length minus the two quotes.
+    unsigned TokNumBytes = TheTok.getLength()-2;
+    
+    // If we found the token we're looking for, return the location.
+    // FIXME: This should consider character escapes!
+    if (ByteNo < TokNumBytes ||
+        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
+      // If the original token came from a macro expansion, just return the
+      // start of the token.  We don't want to magically jump to the spelling
+      // for a diagnostic.  We do the above business in case some tokens come
+      // from a macro expansion but others don't.
+      if (!StrTokLoc.isFileID()) return StrTokLoc;
+      
+      // We advance +1 to step over the '"'.
+      return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
+    }
+    
+    // Move to the next string token.
+    ++TokNo;
+    ByteNo -= TokNumBytes;
+  }
+}
+
+
  /// CheckFunctionCall - Check a direct function call for various correctness
  /// and safety properties not strictly enforced by the C type system.
  Action::OwningExprResult
@@ -108,14 +173,14 @@ bool Sema::CheckObjCString(Expr *Arg) {
    
    for (unsigned i = 0; i < Length; ++i) {
      if (!isascii(Data[i])) {
-      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
+      Diag(getLocationOfStringLiteralByte(Literal, i),
             diag::warn_cfstring_literal_contains_non_ascii_character)
          << Arg->getSourceRange();
        break;
      }
      
      if (!Data[i]) {
-      Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
+      Diag(getLocationOfStringLiteralByte(Literal, i),
             diag::warn_cfstring_literal_contains_nul_character)
          << Arg->getSourceRange();
        break;
@@ -565,7 +630,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
      if (Str[StrIdx] == '\0') {
        // The string returned by getStrData() is not null-terminated,
        // so the presence of a null character is likely an error.
-      Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
+      Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
             diag::warn_printf_format_string_contains_null_char)
          <<  OrigFormatExpr->getSourceRange();
        return;
@@ -587,8 +652,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
        ++numConversions;
        
        if (!HasVAListArg && numConversions > numDataArgs) {
-        SourceLocation Loc = FExpr->getLocStart();
-        Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
+        SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
  
          if (Str[StrIdx-1] == '.')
            Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
@@ -607,8 +671,7 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
          if (BT->getKind() == BuiltinType::Int)
            break;
  
-      SourceLocation Loc =
-        PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
+      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
        
        if (Str[StrIdx-1] == '.')
          Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
@@ -655,8 +718,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
      case 'n': {
        ++numConversions;
        CurrentState = state_OrdChr;
-      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
-                                                      LastConversionIdx+1);
+      SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
+                                                          LastConversionIdx);
                                     
        Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
        break;
@@ -669,8 +732,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
          CurrentState = state_OrdChr; 
        else {
          // Issue a warning: invalid format conversion.
-        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
-                                                    LastConversionIdx+1);
+        SourceLocation Loc = 
+          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
      
          Diag(Loc, diag::warn_printf_invalid_conversion)
            <<  std::string(Str+LastConversionIdx,
@@ -690,8 +753,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
          CurrentState = state_OrdChr; 
        else {
          // Issue a warning: invalid format conversion.
-        SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
-                                                        LastConversionIdx+1);
+        SourceLocation Loc =
+          getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
              
          Diag(Loc, diag::warn_printf_invalid_conversion)
            << std::string(Str+LastConversionIdx, Str+StrIdx)
@@ -713,8 +776,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
  
    if (CurrentState == state_Conversion) {
      // Issue a warning: invalid format conversion.
-    SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
-                                                    LastConversionIdx+1);
+    SourceLocation Loc =
+      getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
      
      Diag(Loc, diag::warn_printf_invalid_conversion)
        << std::string(Str+LastConversionIdx,
@@ -727,8 +790,8 @@ void Sema::CheckPrintfString(StringLiteral *FExpr, Expr *OrigFormatExpr,
      // CHECK: Does the number of format conversions exceed the number
      //        of data arguments?
      if (numConversions > numDataArgs) {
-      SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
-                                                      LastConversionIdx);
+      SourceLocation Loc =
+        getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
                                     
        Diag(Loc, diag::warn_printf_insufficient_data_args)
          << OrigFormatExpr->getSourceRange();
diff --git a/test/SemaObjC/exprs.m b/test/SemaObjC/exprs.m

index cb7f723f39632351d950c52d1bff813849468302..3918923409eeb709c3f10d63f8dafebfbf315a3d 100644 (file)
--- a/test/SemaObjC/exprs.m
+++ b/test/SemaObjC/exprs.m
@@ -1,7 +1,19 @@
  // RUN: clang %s -fsyntax-only -verify
  
  // rdar://6597252
-Class foo(Class X) {
+Class test1(Class X) {
    return 1 ? X : X;
  }
  
+
+// rdar://6079877
+void test2() {
+  id str = @"foo" 
+          "bar\0"    // expected-warning {{literal contains NUL character}}
+          @"baz"  " blarg";
+  id str2 = @"foo" 
+            "bar"
+           @"baz"
+           " b\0larg";  // expected-warning {{literal contains NUL character}}
+  
+}
+\ No newline at end of file
author	Chris Lattner <sabre@nondot.org>
	Wed, 18 Feb 2009 17:49:48 +0000 (17:49 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Wed, 18 Feb 2009 17:49:48 +0000 (17:49 +0000)
include/clang/AST/Expr.h		patch \| blob \| history
lib/Sema/Sema.h		patch \| blob \| history
lib/Sema/SemaChecking.cpp		patch \| blob \| history
test/SemaObjC/exprs.m		patch \| blob \| history