Next step toward making string diagnostics correct: handle

author Chris Lattner <sabre@nondot.org>

Wed, 18 Feb 2009 19:21:10 +0000 (19:21 +0000)

committer Chris Lattner <sabre@nondot.org>

Wed, 18 Feb 2009 19:21:10 +0000 (19:21 +0000)
author Chris Lattner <sabre@nondot.org>
Wed, 18 Feb 2009 19:21:10 +0000 (19:21 +0000)
committer Chris Lattner <sabre@nondot.org>
Wed, 18 Feb 2009 19:21:10 +0000 (19:21 +0000)
diff --git a/include/clang/Lex/LiteralSupport.h b/include/clang/Lex/LiteralSupport.h

index 06068bad7b97a121ca4f9de833f8627ceeee61a0..82a1f14ad871e6b70a9fb5a337e46c138aa166bf 100644 (file)
--- a/include/clang/Lex/LiteralSupport.h
+++ b/include/clang/Lex/LiteralSupport.h
@@ -156,6 +156,12 @@ public:
    
    const char *GetString() { return &ResultBuf[0]; }
    unsigned GetStringLength() { return ResultPtr-&ResultBuf[0]; }
+  
+  /// getOffsetOfStringByte - This function returns the offset of the
+  /// specified byte of the string data represented by Token.  This handles
+  /// advancing over escape sequences in the string.
+  static unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo,
+                                        Preprocessor &PP);
  };
    
  }  // end namespace clang
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp

index 93b653aae6f29a80ebd369bc87b2ef1cac07a763..9815f9b91e7b9015f620250c77c2453ba3fe04ba 100644 (file)
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -798,3 +798,49 @@ StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
      return;
    }
  }
+
+
+/// getOffsetOfStringByte - This function returns the offset of the
+/// specified byte of the string data represented by Token.  This handles
+/// advancing over escape sequences in the string.
+unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
+                                                    unsigned ByteNo,
+                                                    Preprocessor &PP) {
+  // Get the spelling of the token.
+  llvm::SmallString<16> SpellingBuffer;
+  SpellingBuffer.resize(Tok.getLength());
+  
+  const char *SpellingPtr = &SpellingBuffer[0];
+  unsigned TokLen = PP.getSpelling(Tok, SpellingPtr);
+
+  assert(SpellingPtr[0] != 'L' && "Doesn't handle wide strings yet");
+
+  
+  const char *SpellingStart = SpellingPtr;
+  const char *SpellingEnd = SpellingPtr+TokLen;
+
+  // Skip over the leading quote.
+  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
+  ++SpellingPtr;
+  
+  // Skip over bytes until we find the offset we're looking for.
+  while (ByteNo) {
+    assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
+    
+    // Step over non-escapes simply.
+    if (*SpellingPtr != '\\') {
+      ++SpellingPtr;
+      --ByteNo;
+      continue;
+    }
+    
+    // Otherwise, this is an escape character.  Advance over it.
+    bool HadError = false;
+    ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
+                      Tok.getLocation(), false, PP);
+    assert(!HadError && "This method isn't valid on erroneous strings");
+    --ByteNo;
+  }
+  
+  return SpellingPtr-SpellingStart;
+}
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp

index 38cc427a0044a0777aadd0aeb440cb1343e64c63..b22932bb9a9fd2a093720392c284c1bc9203d988 100644 (file)
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -17,6 +17,7 @@
  #include "clang/AST/DeclObjC.h"
  #include "clang/AST/ExprCXX.h"
  #include "clang/AST/ExprObjC.h"
+#include "clang/Lex/LiteralSupport.h"
  #include "clang/Lex/Preprocessor.h"
  using namespace clang;
  
@@ -31,7 +32,7 @@ SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
                                                      unsigned ByteNo) const {
    assert(!SL->isWide() && "This doesn't work for wide strings yet");
    
-  llvm::SmallString<32> SpellingBuffer;
+  llvm::SmallString<16> SpellingBuffer;
    
    // Loop over all of the tokens in this string until we find the one that
    // contains the byte we're looking for.
@@ -78,13 +79,15 @@ SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
      // The length of the string is the token length minus the two quotes.
      TokNumBytes -= 2;
  
-    // FIXME: This should consider character escapes!
-
      // If the byte is in this token, return the location of the byte.
      if (ByteNo < TokNumBytes ||
          (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
-      // We advance +1 to step over the '"'.
-      return PP.AdvanceToTokenCharacter(StrTokLoc, ByteNo+1);
+      unsigned Offset = 
+        StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP);
+     
+      // Now that we know the offset of the token in the spelling, use the
+      // preprocessor to get the offset in the original source.
+      return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
      }
      
      // Move to the next string token.
author	Chris Lattner <sabre@nondot.org>
	Wed, 18 Feb 2009 19:21:10 +0000 (19:21 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Wed, 18 Feb 2009 19:21:10 +0000 (19:21 +0000)
include/clang/Lex/LiteralSupport.h		patch \| blob \| history
lib/Lex/LiteralSupport.cpp		patch \| blob \| history
lib/Sema/SemaChecking.cpp		patch \| blob \| history