From: Chris Lattner Date: Wed, 17 Nov 2010 07:05:50 +0000 (+0000) Subject: move AdvanceToTokenCharacter and getLocForEndOfToken from X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7ef5c27eb6e8ebe58b52013246c06753c3613263;p=clang move AdvanceToTokenCharacter and getLocForEndOfToken from Preprocessor to Lexer where they make more sense. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119474 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/docs/InternalsManual.html b/docs/InternalsManual.html index cf89c190f9..0720329cbd 100644 --- a/docs/InternalsManual.html +++ b/docs/InternalsManual.html @@ -565,7 +565,7 @@ x = foo + bar; representation, the 'last' location needs to be adjusted to point to (or past) the end of that token with either Lexer::MeasureTokenLength() or -Preprocessor::getLocForEndOfToken(). For the rare cases +Lexer::getLocForEndOfToken(). For the rare cases where character-level source ranges information is needed we use the CharSourceRange class.

diff --git a/include/clang/Basic/SourceLocation.h b/include/clang/Basic/SourceLocation.h index 35f27fbebd..da2ec15e3f 100644 --- a/include/clang/Basic/SourceLocation.h +++ b/include/clang/Basic/SourceLocation.h @@ -121,7 +121,6 @@ public: /// directly. unsigned getRawEncoding() const { return ID; } - /// getFromRawEncoding - Turn a raw encoding of a SourceLocation object into /// a real SourceLocation. static SourceLocation getFromRawEncoding(unsigned Encoding) { diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 9e0fb7ee70..0237969a2d 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -228,6 +228,33 @@ public: const SourceManager &SM, const LangOptions &LangOpts); + /// AdvanceToTokenCharacter - If the current SourceLocation specifies a + /// location at the start of a token, return a new location that specifies a + /// character within the token. This handles trigraphs and escaped newlines. + static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, + unsigned Character, + const SourceManager &SM, + const LangOptions &Features); + + /// \brief Computes the source location just past the end of the + /// token at this source location. + /// + /// This routine can be used to produce a source location that + /// points just past the end of the token referenced by \p Loc, and + /// is generally used when a diagnostic needs to point just after a + /// token where it expected something different that it received. If + /// the returned source location would not be meaningful (e.g., if + /// it points into a macro), this routine returns an invalid + /// source location. + /// + /// \param Offset an offset from the end of the token, where the source + /// location should refer to. The default offset (0) produces a source + /// location pointing just past the end of the token; an offset of 1 produces + /// a source location pointing to the last character in the token, etc. + static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, + const SourceManager &SM, + const LangOptions &Features); + /// \brief Compute the preamble of the given file. /// /// The preamble of a file contains the initial comments, include directives, diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index c3e7349011..c45a75fc62 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -717,7 +717,9 @@ public: /// location should refer to. The default offset (0) produces a source /// location pointing just past the end of the token; an offset of 1 produces /// a source location pointing to the last character in the token, etc. - SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0); + SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) { + return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, Features); + } /// DumpToken - Print the token to stderr, used for debugging. /// @@ -729,12 +731,8 @@ public: /// token, return a new location that specifies a character within the token. SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Char) const { - return AdvanceToTokenCharacter(FullSourceLoc(TokStart, SourceMgr), Char, - Features); + return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, Features); } - static FullSourceLoc AdvanceToTokenCharacter(FullSourceLoc TokStart, - unsigned Char, - const LangOptions &Features); /// IncrementPasteCounter - Increment the counters for the number of token diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index ad3d35af6b..3e68875768 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -444,6 +444,83 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) { : TheTok.isAtStartOfLine()); } + +/// AdvanceToTokenCharacter - Given a location that specifies the start of a +/// token, return a new location that specifies a character within the token. +SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart, + unsigned CharNo, + const SourceManager &SM, + const LangOptions &Features) { + // Figure out how many physical characters away the specified instantiation + // character is. This needs to take into consideration newlines and + // trigraphs. + bool Invalid = false; + const char *TokPtr = SM.getCharacterData(TokStart, &Invalid); + + // If they request the first char of the token, we're trivially done. + if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) + return TokStart; + + unsigned PhysOffset = 0; + + // The usual case is that tokens don't contain anything interesting. Skip + // over the uninteresting characters. If a token only consists of simple + // chars, this method is extremely fast. + while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { + if (CharNo == 0) + return TokStart.getFileLocWithOffset(PhysOffset); + ++TokPtr, --CharNo, ++PhysOffset; + } + + // If we have a character that may be a trigraph or escaped newline, use a + // lexer to parse it correctly. + for (; CharNo; --CharNo) { + unsigned Size; + Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features); + TokPtr += Size; + PhysOffset += Size; + } + + // Final detail: if we end up on an escaped newline, we want to return the + // location of the actual byte of the token. For example foo\bar + // advanced by 3 should return the location of b, not of \\. One compounding + // detail of this is that the escape may be made by a trigraph. + if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) + PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; + + return TokStart.getFileLocWithOffset(PhysOffset); +} + +/// \brief Computes the source location just past the end of the +/// token at this source location. +/// +/// This routine can be used to produce a source location that +/// points just past the end of the token referenced by \p Loc, and +/// is generally used when a diagnostic needs to point just after a +/// token where it expected something different that it received. If +/// the returned source location would not be meaningful (e.g., if +/// it points into a macro), this routine returns an invalid +/// source location. +/// +/// \param Offset an offset from the end of the token, where the source +/// location should refer to. The default offset (0) produces a source +/// location pointing just past the end of the token; an offset of 1 produces +/// a source location pointing to the last character in the token, etc. +SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset, + const SourceManager &SM, + const LangOptions &Features) { + if (Loc.isInvalid() || !Loc.isFileID()) + return SourceLocation(); + + unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features); + if (Len > Offset) + Len = Len - Offset; + else + return Loc; + + return AdvanceToTokenCharacter(Loc, Len, SM, Features); +} + //===----------------------------------------------------------------------===// // Character information. //===----------------------------------------------------------------------===// diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp index 8ad1669647..4a23355642 100644 --- a/lib/Lex/LiteralSupport.cpp +++ b/lib/Lex/LiteralSupport.cpp @@ -194,9 +194,11 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd, // If we didn't consume the proper number of digits, there is a problem. if (UcnLenSave) { if (Diags) { - Loc = Preprocessor::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin, - Features); - Diags->Report(Loc, diag::err_ucn_escape_incomplete); + SourceLocation L = + Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin, + Loc.getManager(), Features); + Diags->Report(FullSourceLoc(L, Loc.getManager()), + diag::err_ucn_escape_incomplete); } return false; } diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp index 332c95679b..e0eb6661d6 100644 --- a/lib/Lex/Preprocessor.cpp +++ b/lib/Lex/Preprocessor.cpp @@ -429,68 +429,6 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok, } -/// AdvanceToTokenCharacter - Given a location that specifies the start of a -/// token, return a new location that specifies a character within the token. -FullSourceLoc Preprocessor::AdvanceToTokenCharacter(FullSourceLoc TokStart, - unsigned CharNo, - const LangOptions &Features) { - // Figure out how many physical characters away the specified instantiation - // character is. This needs to take into consideration newlines and - // trigraphs. - bool Invalid = false; - const char *TokPtr = TokStart.getCharacterData(&Invalid); - - // If they request the first char of the token, we're trivially done. - if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr))) - return TokStart; - - unsigned PhysOffset = 0; - - // The usual case is that tokens don't contain anything interesting. Skip - // over the uninteresting characters. If a token only consists of simple - // chars, this method is extremely fast. - while (Lexer::isObviouslySimpleCharacter(*TokPtr)) { - if (CharNo == 0) - return FullSourceLoc(TokStart.getFileLocWithOffset(PhysOffset), - TokStart.getManager()); - ++TokPtr, --CharNo, ++PhysOffset; - } - - // If we have a character that may be a trigraph or escaped newline, use a - // lexer to parse it correctly. - for (; CharNo; --CharNo) { - unsigned Size; - Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features); - TokPtr += Size; - PhysOffset += Size; - } - - // Final detail: if we end up on an escaped newline, we want to return the - // location of the actual byte of the token. For example foo\bar - // advanced by 3 should return the location of b, not of \\. One compounding - // detail of this is that the escape may be made by a trigraph. - if (!Lexer::isObviouslySimpleCharacter(*TokPtr)) - PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr; - - return FullSourceLoc(TokStart.getFileLocWithOffset(PhysOffset), - TokStart.getManager()); -} - -SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc, - unsigned Offset) { - if (Loc.isInvalid() || !Loc.isFileID()) - return SourceLocation(); - - unsigned Len = Lexer::MeasureTokenLength(Loc, getSourceManager(), Features); - if (Len > Offset) - Len = Len - Offset; - else - return Loc; - - return AdvanceToTokenCharacter(Loc, Len); -} - - //===----------------------------------------------------------------------===// // Preprocessor Initialization Methods