From: Chris Lattner Date: Wed, 17 Nov 2010 07:37:15 +0000 (+0000) Subject: a metric ton of refactoring later, Sema::getLocationOfStringLiteralByte X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=08f92e3a5dead1f1ee656678a7f06e43279d6e50;p=clang a metric ton of refactoring later, Sema::getLocationOfStringLiteralByte no longer depends on Preprocessor, so we can move it out of Sema into a nice new StringLiteral::getLocationOfByte method that can be used by any AST client. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119481 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h index 89295e4c7e..74bc62943e 100644 --- a/include/clang/AST/Expr.h +++ b/include/clang/AST/Expr.h @@ -1034,6 +1034,17 @@ public: assert(TokNum < NumConcatenated && "Invalid tok number"); TokLocs[TokNum] = L; } + + /// getLocationOfByte - Return a source location that points to the specified + /// byte of this string literal. + /// + /// Strings are amazingly complex. They can be formed from multiple tokens + /// and can have escape sequences in them in addition to the usual trigraph + /// and escaped newline business. This routine handles this complexity. + /// + SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM, + const LangOptions &Features, + const TargetInfo &Target) const; typedef const SourceLocation *tokloc_iterator; tokloc_iterator tokloc_begin() const { return TokLocs; } diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp index e36c02f3a9..b03594f8f9 100644 --- a/lib/AST/Expr.cpp +++ b/lib/AST/Expr.cpp @@ -20,7 +20,10 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" +#include "clang/Lex/LiteralSupport.h" +#include "clang/Lex/Lexer.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -467,6 +470,72 @@ void StringLiteral::setString(ASTContext &C, llvm::StringRef Str) { ByteLength = Str.size(); } +/// getLocationOfByte - Return a source location that points to the specified +/// byte of this string literal. +/// +/// Strings are amazingly complex. They can be formed from multiple tokens and +/// can have escape sequences in them in addition to the usual trigraph and +/// escaped newline business. This routine handles this complexity. +/// +SourceLocation StringLiteral:: +getLocationOfByte(unsigned ByteNo, const SourceManager &SM, + const LangOptions &Features, const TargetInfo &Target) const { + assert(!isWide() && "This doesn't work for wide strings yet"); + + // Loop over all of the tokens in this string until we find the one that + // contains the byte we're looking for. + unsigned TokNo = 0; + while (1) { + assert(TokNo < getNumConcatenated() && "Invalid byte number!"); + SourceLocation StrTokLoc = getStrTokenLoc(TokNo); + + // Get the spelling of the string so that we can get the data that makes up + // the string literal, not the identifier for the macro it is potentially + // expanded through. + SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc); + + // Re-lex the token to get its length and original spelling. + std::pair LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc); + bool Invalid = false; + llvm::StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); + if (Invalid) + return StrTokSpellingLoc; + + const char *StrData = Buffer.data()+LocInfo.second; + + // Create a langops struct and enable trigraphs. This is sufficient for + // relexing tokens. + LangOptions LangOpts; + LangOpts.Trigraphs = true; + + // Create a lexer starting at the beginning of this token. + Lexer TheLexer(StrTokSpellingLoc, Features, Buffer.begin(), StrData, + Buffer.end()); + Token TheTok; + TheLexer.LexFromRawLexer(TheTok); + + // Use the StringLiteralParser to compute the length of the string in bytes. + StringLiteralParser SLP(&TheTok, 1, SM, Features, Target); + unsigned TokNumBytes = SLP.GetStringLength(); + + // If the byte is in this token, return the location of the byte. + if (ByteNo < TokNumBytes || + (ByteNo == TokNumBytes && TokNo == getNumConcatenated())) { + unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); + + // Now that we know the offset of the token in the spelling, use the + // preprocessor to get the offset in the original source. + return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features); + } + + // Move to the next string token. + ++TokNo; + ByteNo -= TokNumBytes; + } +} + + + /// getOpcodeStr - Turn an Opcode enum value into the punctuation char it /// corresponds to, e.g. "sizeof" or "[pre]++". const char *UnaryOperator::getOpcodeStr(Opcode Op) { diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index 0fa1fc18e3..d04addd210 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -25,7 +25,6 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" -#include "clang/Lex/LiteralSupport.h" #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" @@ -33,75 +32,16 @@ #include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetInfo.h" #include "clang/Basic/ConvertUTF.h" - #include using namespace clang; using namespace sema; -/// getLocationOfStringLiteralByte - Return a source location that points to the -/// specified byte of the specified string literal. -/// -/// Strings are amazingly complex. They can be formed from multiple tokens and -/// can have escape sequences in them in addition to the usual trigraph and -/// escaped newline business. This routine handles this complexity. -/// SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL, unsigned ByteNo) const { - assert(!SL->isWide() && "This doesn't work for wide strings yet"); - - // Loop over all of the tokens in this string until we find the one that - // contains the byte we're looking for. - unsigned TokNo = 0; - while (1) { - assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!"); - SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo); - - // Get the spelling of the string so that we can get the data that makes up - // the string literal, not the identifier for the macro it is potentially - // expanded through. - SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc); - - // Re-lex the token to get its length and original spelling. - std::pair LocInfo = - SourceMgr.getDecomposedLoc(StrTokSpellingLoc); - bool Invalid = false; - llvm::StringRef Buffer = SourceMgr.getBufferData(LocInfo.first, &Invalid); - if (Invalid) - return StrTokSpellingLoc; - - const char *StrData = Buffer.data()+LocInfo.second; - - // Create a langops struct and enable trigraphs. This is sufficient for - // relexing tokens. - LangOptions LangOpts; - LangOpts.Trigraphs = true; - - // Create a lexer starting at the beginning of this token. - Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.begin(), StrData, - Buffer.end()); - Token TheTok; - TheLexer.LexFromRawLexer(TheTok); - - // Use the StringLiteralParser to compute the length of the string in bytes. - StringLiteralParser SLP(&TheTok, 1, PP.getSourceManager(), - PP.getLangOptions(), PP.getTargetInfo()); - unsigned TokNumBytes = SLP.GetStringLength(); - - // If the byte is in this token, return the location of the byte. - if (ByteNo < TokNumBytes || - (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) { - unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); - - // Now that we know the offset of the token in the spelling, use the - // preprocessor to get the offset in the original source. - return PP.AdvanceToTokenCharacter(StrTokLoc, Offset); - } - - // Move to the next string token. - ++TokNo; - ByteNo -= TokNumBytes; - } + return SL->getLocationOfByte(ByteNo, PP.getSourceManager(), + PP.getLangOptions(), PP.getTargetInfo()); } + /// CheckablePrintfAttr - does a function call have a "printf" attribute /// and arguments that merit checking?