a metric ton of refactoring later, Sema::getLocationOfStringLiteralByte

author Chris Lattner <sabre@nondot.org>

Wed, 17 Nov 2010 07:37:15 +0000 (07:37 +0000)

committer Chris Lattner <sabre@nondot.org>

Wed, 17 Nov 2010 07:37:15 +0000 (07:37 +0000)
author Chris Lattner <sabre@nondot.org>
Wed, 17 Nov 2010 07:37:15 +0000 (07:37 +0000)
committer Chris Lattner <sabre@nondot.org>
Wed, 17 Nov 2010 07:37:15 +0000 (07:37 +0000)
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h

index 89295e4c7e7b75dd482f0ffbcc9eeb1674d5a8f2..74bc62943e00debe508266b9c01e3a6301324362 100644 (file)
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1034,6 +1034,17 @@ public:
      assert(TokNum < NumConcatenated && "Invalid tok number");
      TokLocs[TokNum] = L;
    }
+  
+  /// getLocationOfByte - Return a source location that points to the specified
+  /// byte of this string literal.
+  ///
+  /// Strings are amazingly complex.  They can be formed from multiple tokens
+  /// and can have escape sequences in them in addition to the usual trigraph
+  /// and escaped newline business.  This routine handles this complexity.
+  ///
+  SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                                   const LangOptions &Features,
+                                   const TargetInfo &Target) const;
  
    typedef const SourceLocation *tokloc_iterator;
    tokloc_iterator tokloc_begin() const { return TokLocs; }
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp

index e36c02f3a992c885dcafb468ee366eb239bdddea..b03594f8f98b356de5c684dc943c70231bf65c09 100644 (file)
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -20,7 +20,10 @@
  #include "clang/AST/DeclTemplate.h"
  #include "clang/AST/RecordLayout.h"
  #include "clang/AST/StmtVisitor.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Lexer.h"
  #include "clang/Basic/Builtins.h"
+#include "clang/Basic/SourceManager.h"
  #include "clang/Basic/TargetInfo.h"
  #include "llvm/Support/ErrorHandling.h"
  #include "llvm/Support/raw_ostream.h"
@@ -467,6 +470,72 @@ void StringLiteral::setString(ASTContext &C, llvm::StringRef Str) {
    ByteLength = Str.size();
  }
  
+/// getLocationOfByte - Return a source location that points to the specified
+/// byte of this string literal.
+///
+/// Strings are amazingly complex.  They can be formed from multiple tokens and
+/// can have escape sequences in them in addition to the usual trigraph and
+/// escaped newline business.  This routine handles this complexity.
+///
+SourceLocation StringLiteral::
+getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                  const LangOptions &Features, const TargetInfo &Target) const {
+  assert(!isWide() && "This doesn't work for wide strings yet");
+  
+  // Loop over all of the tokens in this string until we find the one that
+  // contains the byte we're looking for.
+  unsigned TokNo = 0;
+  while (1) {
+    assert(TokNo < getNumConcatenated() && "Invalid byte number!");
+    SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
+    
+    // Get the spelling of the string so that we can get the data that makes up
+    // the string literal, not the identifier for the macro it is potentially
+    // expanded through.
+    SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc);
+    
+    // Re-lex the token to get its length and original spelling.
+    std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc);
+    bool Invalid = false;
+    llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+    if (Invalid)
+      return StrTokSpellingLoc;
+    
+    const char *StrData = Buffer.data()+LocInfo.second;
+    
+    // Create a langops struct and enable trigraphs.  This is sufficient for
+    // relexing tokens.
+    LangOptions LangOpts;
+    LangOpts.Trigraphs = true;
+    
+    // Create a lexer starting at the beginning of this token.
+    Lexer TheLexer(StrTokSpellingLoc, Features, Buffer.begin(), StrData,
+                   Buffer.end());
+    Token TheTok;
+    TheLexer.LexFromRawLexer(TheTok);
+    
+    // Use the StringLiteralParser to compute the length of the string in bytes.
+    StringLiteralParser SLP(&TheTok, 1, SM, Features, Target);
+    unsigned TokNumBytes = SLP.GetStringLength();
+    
+    // If the byte is in this token, return the location of the byte.
+    if (ByteNo < TokNumBytes ||
+        (ByteNo == TokNumBytes && TokNo == getNumConcatenated())) {
+      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
+      
+      // Now that we know the offset of the token in the spelling, use the
+      // preprocessor to get the offset in the original source.
+      return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
+    }
+    
+    // Move to the next string token.
+    ++TokNo;
+    ByteNo -= TokNumBytes;
+  }
+}
+
+
+
  /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it
  /// corresponds to, e.g. "sizeof" or "[pre]++".
  const char *UnaryOperator::getOpcodeStr(Opcode Op) {
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp

index 0fa1fc18e309ddd8c18f4cfcc5e0757e61ab8795..d04addd2103fd8e77cd1663723ba13b984c12791 100644 (file)
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -25,7 +25,6 @@
  #include "clang/AST/DeclObjC.h"
  #include "clang/AST/StmtCXX.h"
  #include "clang/AST/StmtObjC.h"
-#include "clang/Lex/LiteralSupport.h"
  #include "clang/Lex/Preprocessor.h"
  #include "llvm/ADT/BitVector.h"
  #include "llvm/ADT/STLExtras.h"
@@ -33,75 +32,16 @@
  #include "clang/Basic/TargetBuiltins.h"
  #include "clang/Basic/TargetInfo.h"
  #include "clang/Basic/ConvertUTF.h"
-
  #include <limits>
  using namespace clang;
  using namespace sema;
  
-/// getLocationOfStringLiteralByte - Return a source location that points to the
-/// specified byte of the specified string literal.
-///
-/// Strings are amazingly complex.  They can be formed from multiple tokens and
-/// can have escape sequences in them in addition to the usual trigraph and
-/// escaped newline business.  This routine handles this complexity.
-///
  SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
                                                      unsigned ByteNo) const {
-  assert(!SL->isWide() && "This doesn't work for wide strings yet");
-
-  // Loop over all of the tokens in this string until we find the one that
-  // contains the byte we're looking for.
-  unsigned TokNo = 0;
-  while (1) {
-    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
-    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
-
-    // Get the spelling of the string so that we can get the data that makes up
-    // the string literal, not the identifier for the macro it is potentially
-    // expanded through.
-    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
-
-    // Re-lex the token to get its length and original spelling.
-    std::pair<FileID, unsigned> LocInfo =
-      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
-    bool Invalid = false;
-    llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid);
-    if (Invalid)
-      return StrTokSpellingLoc;
-      
-    const char *StrData = Buffer.data()+LocInfo.second;
-
-    // Create a langops struct and enable trigraphs.  This is sufficient for
-    // relexing tokens.
-    LangOptions LangOpts;
-    LangOpts.Trigraphs = true;
-
-    // Create a lexer starting at the beginning of this token.
-    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData,
-                   Buffer.end());
-    Token TheTok;
-    TheLexer.LexFromRawLexer(TheTok);
-
-    // Use the StringLiteralParser to compute the length of the string in bytes.
-    StringLiteralParser SLP(&TheTok, 1, PP.getSourceManager(),
-                            PP.getLangOptions(), PP.getTargetInfo());
-    unsigned TokNumBytes = SLP.GetStringLength();
-
-    // If the byte is in this token, return the location of the byte.
-    if (ByteNo < TokNumBytes ||
-        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
-      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
-
-      // Now that we know the offset of the token in the spelling, use the
-      // preprocessor to get the offset in the original source.
-      return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
-    }
-
-    // Move to the next string token.
-    ++TokNo;
-    ByteNo -= TokNumBytes;
-  }
+  return SL->getLocationOfByte(ByteNo, PP.getSourceManager(),
+                               PP.getLangOptions(), PP.getTargetInfo());
  }
+  
  
  /// CheckablePrintfAttr - does a function call have a "printf" attribute
  /// and arguments that merit checking?
author	Chris Lattner <sabre@nondot.org>
	Wed, 17 Nov 2010 07:37:15 +0000 (07:37 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Wed, 17 Nov 2010 07:37:15 +0000 (07:37 +0000)
include/clang/AST/Expr.h		patch \| blob \| history
lib/AST/Expr.cpp		patch \| blob \| history
lib/Sema/SemaChecking.cpp		patch \| blob \| history