From: Chris Lattner <sabre@nondot.org>
Date: Wed, 17 Nov 2010 07:37:15 +0000 (+0000)
Subject: a metric ton of refactoring later, Sema::getLocationOfStringLiteralByte
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=08f92e3a5dead1f1ee656678a7f06e43279d6e50;p=clang

a metric ton of refactoring later, Sema::getLocationOfStringLiteralByte
no longer depends on Preprocessor, so we can move it out of Sema into
a nice new StringLiteral::getLocationOfByte method that can be used by
any AST client.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119481 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index 89295e4c7e..74bc62943e 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -1034,6 +1034,17 @@ public:
     assert(TokNum < NumConcatenated && "Invalid tok number");
     TokLocs[TokNum] = L;
   }
+  
+  /// getLocationOfByte - Return a source location that points to the specified
+  /// byte of this string literal.
+  ///
+  /// Strings are amazingly complex.  They can be formed from multiple tokens
+  /// and can have escape sequences in them in addition to the usual trigraph
+  /// and escaped newline business.  This routine handles this complexity.
+  ///
+  SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                                   const LangOptions &Features,
+                                   const TargetInfo &Target) const;
 
   typedef const SourceLocation *tokloc_iterator;
   tokloc_iterator tokloc_begin() const { return TokLocs; }
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index e36c02f3a9..b03594f8f9 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -20,7 +20,10 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/Lex/LiteralSupport.h"
+#include "clang/Lex/Lexer.h"
 #include "clang/Basic/Builtins.h"
+#include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -467,6 +470,72 @@ void StringLiteral::setString(ASTContext &C, llvm::StringRef Str) {
   ByteLength = Str.size();
 }
 
+/// getLocationOfByte - Return a source location that points to the specified
+/// byte of this string literal.
+///
+/// Strings are amazingly complex.  They can be formed from multiple tokens and
+/// can have escape sequences in them in addition to the usual trigraph and
+/// escaped newline business.  This routine handles this complexity.
+///
+SourceLocation StringLiteral::
+getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                  const LangOptions &Features, const TargetInfo &Target) const {
+  assert(!isWide() && "This doesn't work for wide strings yet");
+  
+  // Loop over all of the tokens in this string until we find the one that
+  // contains the byte we're looking for.
+  unsigned TokNo = 0;
+  while (1) {
+    assert(TokNo < getNumConcatenated() && "Invalid byte number!");
+    SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
+    
+    // Get the spelling of the string so that we can get the data that makes up
+    // the string literal, not the identifier for the macro it is potentially
+    // expanded through.
+    SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc);
+    
+    // Re-lex the token to get its length and original spelling.
+    std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc);
+    bool Invalid = false;
+    llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
+    if (Invalid)
+      return StrTokSpellingLoc;
+    
+    const char *StrData = Buffer.data()+LocInfo.second;
+    
+    // Create a langops struct and enable trigraphs.  This is sufficient for
+    // relexing tokens.
+    LangOptions LangOpts;
+    LangOpts.Trigraphs = true;
+    
+    // Create a lexer starting at the beginning of this token.
+    Lexer TheLexer(StrTokSpellingLoc, Features, Buffer.begin(), StrData,
+                   Buffer.end());
+    Token TheTok;
+    TheLexer.LexFromRawLexer(TheTok);
+    
+    // Use the StringLiteralParser to compute the length of the string in bytes.
+    StringLiteralParser SLP(&TheTok, 1, SM, Features, Target);
+    unsigned TokNumBytes = SLP.GetStringLength();
+    
+    // If the byte is in this token, return the location of the byte.
+    if (ByteNo < TokNumBytes ||
+        (ByteNo == TokNumBytes && TokNo == getNumConcatenated())) {
+      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
+      
+      // Now that we know the offset of the token in the spelling, use the
+      // preprocessor to get the offset in the original source.
+      return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
+    }
+    
+    // Move to the next string token.
+    ++TokNo;
+    ByteNo -= TokNumBytes;
+  }
+}
+
+
+
 /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it
 /// corresponds to, e.g. "sizeof" or "[pre]++".
 const char *UnaryOperator::getOpcodeStr(Opcode Op) {
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 0fa1fc18e3..d04addd210 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -25,7 +25,6 @@
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
-#include "clang/Lex/LiteralSupport.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
@@ -33,75 +32,16 @@
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/ConvertUTF.h"
-
 #include <limits>
 using namespace clang;
 using namespace sema;
 
-/// getLocationOfStringLiteralByte - Return a source location that points to the
-/// specified byte of the specified string literal.
-///
-/// Strings are amazingly complex.  They can be formed from multiple tokens and
-/// can have escape sequences in them in addition to the usual trigraph and
-/// escaped newline business.  This routine handles this complexity.
-///
 SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
                                                     unsigned ByteNo) const {
-  assert(!SL->isWide() && "This doesn't work for wide strings yet");
-
-  // Loop over all of the tokens in this string until we find the one that
-  // contains the byte we're looking for.
-  unsigned TokNo = 0;
-  while (1) {
-    assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
-    SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
-
-    // Get the spelling of the string so that we can get the data that makes up
-    // the string literal, not the identifier for the macro it is potentially
-    // expanded through.
-    SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
-
-    // Re-lex the token to get its length and original spelling.
-    std::pair<FileID, unsigned> LocInfo =
-      SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
-    bool Invalid = false;
-    llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid);
-    if (Invalid)
-      return StrTokSpellingLoc;
-      
-    const char *StrData = Buffer.data()+LocInfo.second;
-
-    // Create a langops struct and enable trigraphs.  This is sufficient for
-    // relexing tokens.
-    LangOptions LangOpts;
-    LangOpts.Trigraphs = true;
-
-    // Create a lexer starting at the beginning of this token.
-    Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData,
-                   Buffer.end());
-    Token TheTok;
-    TheLexer.LexFromRawLexer(TheTok);
-
-    // Use the StringLiteralParser to compute the length of the string in bytes.
-    StringLiteralParser SLP(&TheTok, 1, PP.getSourceManager(),
-                            PP.getLangOptions(), PP.getTargetInfo());
-    unsigned TokNumBytes = SLP.GetStringLength();
-
-    // If the byte is in this token, return the location of the byte.
-    if (ByteNo < TokNumBytes ||
-        (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
-      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
-
-      // Now that we know the offset of the token in the spelling, use the
-      // preprocessor to get the offset in the original source.
-      return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
-    }
-
-    // Move to the next string token.
-    ++TokNo;
-    ByteNo -= TokNumBytes;
-  }
+  return SL->getLocationOfByte(ByteNo, PP.getSourceManager(),
+                               PP.getLangOptions(), PP.getTargetInfo());
 }
+  
 
 /// CheckablePrintfAttr - does a function call have a "printf" attribute
 /// and arguments that merit checking?