From aca25bccefe56121b686706afc84c8cb5d46e65b Mon Sep 17 00:00:00 2001 From: Anna Zaks Date: Wed, 27 Jul 2011 21:43:43 +0000 Subject: [PATCH] Add a utility function to the Lexer, which makes it easier to find a token after the given location. (It is a generalized version of trans::findLocationAfterSemi from ArcMigrate, which will be changed to use the Lexer utility). git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@136268 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Lex/Lexer.h | 12 ++++++++ lib/Lex/Lexer.cpp | 59 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 2c25597433..e24fe9c9ab 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -456,6 +456,18 @@ public: /// them), skip over them and return the first non-escaped-newline found, /// otherwise return P. static const char *SkipEscapedNewLines(const char *P); + + /// \brief Checks that the given token is the first token that occurs after + /// the given location (this excludes comments and whitespace). Returns the + /// location immediately after the specified token. If the token is not found + /// or the location is inside a macro, the returned source location will be + /// invalid. + static SourceLocation findLocationAfterToken(SourceLocation loc, + tok::TokenKind TKind, + const SourceManager &SM, + const LangOptions &LangOpts, + bool SkipTrailingWhitespaceAndNewLine); + private: /// getCharAndSizeSlowNoWarn - Same as getCharAndSizeSlow, but never emits a diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 44674a93d7..0664cbc21b 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -868,6 +868,12 @@ static inline bool isHorizontalWhitespace(unsigned char c) { return (CharInfo[c] & CHAR_HORZ_WS) ? true : false; } +/// isVerticalWhitespace - Return true if this character is vertical +/// whitespace: '\n', '\r'. Note that this returns false for '\0'. +static inline bool isVerticalWhitespace(unsigned char c) { + return (CharInfo[c] & CHAR_VERT_WS) ? true : false; +} + /// isWhitespace - Return true if this character is horizontal or vertical /// whitespace: ' ', '\t', '\f', '\v', '\n', '\r'. Note that this returns false /// for '\0'. @@ -1027,6 +1033,59 @@ const char *Lexer::SkipEscapedNewLines(const char *P) { } } +/// \brief Checks that the given token is the first token that occurs after the +/// given location (this excludes comments and whitespace). Returns the location +/// immediately after the specified token. If the token is not found or the +/// location is inside a macro, the returned source location will be invalid. +SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc, + tok::TokenKind TKind, + const SourceManager &SM, + const LangOptions &LangOpts, + bool SkipTrailingWhitespaceAndNewLine) { + if (Loc.isMacroID()) { + if (!Lexer::isAtEndOfMacroExpansion(Loc, SM, LangOpts)) + return SourceLocation(); + Loc = SM.getExpansionRange(Loc).second; + } + Loc = Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); + + // Break down the source location. + std::pair LocInfo = SM.getDecomposedLoc(Loc); + + // Try to load the file buffer. + bool InvalidTemp = false; + llvm::StringRef File = SM.getBufferData(LocInfo.first, &InvalidTemp); + if (InvalidTemp) + return SourceLocation(); + + const char *TokenBegin = File.data() + LocInfo.second; + + // Lex from the start of the given location. + Lexer lexer(SM.getLocForStartOfFile(LocInfo.first), LangOpts, File.begin(), + TokenBegin, File.end()); + // Find the token. + Token Tok; + lexer.LexFromRawLexer(Tok); + if (Tok.isNot(TKind)) + return SourceLocation(); + SourceLocation TokenLoc = Tok.getLocation(); + + // Calculate how much whitespace needs to be skipped if any. + unsigned NumWhitespaceChars = 0; + if (SkipTrailingWhitespaceAndNewLine) { + const char *TokenEnd = SM.getCharacterData(TokenLoc) + + Tok.getLength(); + unsigned char C = *TokenEnd; + while (isHorizontalWhitespace(C)) { + C = *(++TokenEnd); + NumWhitespaceChars++; + } + if (isVerticalWhitespace(C)) + NumWhitespaceChars++; + } + + return TokenLoc.getFileLocWithOffset(Tok.getLength() + NumWhitespaceChars); +} /// getCharAndSizeSlow - Peek a single 'character' from the specified buffer, /// get its size, and return it. This is tricky in several cases: -- 2.40.0