From: Chris Lattner Date: Wed, 17 Oct 2007 21:18:47 +0000 (+0000) Subject: Move token length calculation out of the diagnostics machinery into X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9a6119437672f42be5f50c3fe89fe843b1bfa5b5;p=clang Move token length calculation out of the diagnostics machinery into the lexer, where it can be shared. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@43090 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/Driver/TextDiagnosticPrinter.cpp b/Driver/TextDiagnosticPrinter.cpp index 76809d7dea..81c73fd533 100644 --- a/Driver/TextDiagnosticPrinter.cpp +++ b/Driver/TextDiagnosticPrinter.cpp @@ -80,7 +80,7 @@ void TextDiagnosticPrinter::HighlightRange(const SourceRange &R, --EndColNo; // Zero base the col #. // Add in the length of the token, so that we cover multi-char tokens. - EndColNo += GetTokenLength(R.getEnd()); + EndColNo += Lexer::MeasureTokenLength(R.getEnd(), SourceMgr); } else { EndColNo = CaratLine.size(); } @@ -97,31 +97,6 @@ void TextDiagnosticPrinter::HighlightRange(const SourceRange &R, CaratLine[i] = '~'; } -/// GetTokenLength - Given the source location of a token, determine its length. -/// This is a fully general function that uses a lexer to relex the token. -unsigned TextDiagnosticPrinter::GetTokenLength(SourceLocation Loc) { - // If this comes from a macro expansion, we really do want the macro name, not - // the token this macro expanded to. - Loc = SourceMgr.getLogicalLoc(Loc); - const char *StrData = SourceMgr.getCharacterData(Loc); - const char *BufEnd = SourceMgr.getBufferData(Loc.getFileID()).second; - - // TODO: this could be special cased for common tokens like identifiers, ')', - // etc to make this faster, if it mattered. This could use - // Lexer::isObviouslySimpleCharacter for example. - - // Create a langops struct and enable trigraphs. This is sufficient for - // measuring tokens. - LangOptions LangOpts; - LangOpts.Trigraphs = true; - - // Create a lexer starting at the beginning of this token. - Lexer TheLexer(Loc, LangOpts, StrData, BufEnd); - Token TheTok; - TheLexer.LexRawToken(TheTok); - return TheTok.getLength(); -} - void TextDiagnosticPrinter::HandleDiagnostic(Diagnostic::Level Level, SourceLocation Pos, diag::kind ID, diff --git a/Driver/TextDiagnosticPrinter.h b/Driver/TextDiagnosticPrinter.h index 71e584ebf4..3df38c9d42 100644 --- a/Driver/TextDiagnosticPrinter.h +++ b/Driver/TextDiagnosticPrinter.h @@ -31,7 +31,6 @@ public: void HighlightRange(const SourceRange &R, unsigned LineNo, std::string &CaratLine, const std::string &SourceLine); - unsigned GetTokenLength(SourceLocation Loc); virtual void HandleDiagnostic(Diagnostic::Level DiagLevel, SourceLocation Pos, diff --git a/Lex/Lexer.cpp b/Lex/Lexer.cpp index c7f54ca8ff..19dcfe2f0c 100644 --- a/Lex/Lexer.cpp +++ b/Lex/Lexer.cpp @@ -163,6 +163,39 @@ void Lexer::Stringify(llvm::SmallVectorImpl &Str) { } +/// MeasureTokenLength - Relex the token at the specified location and return +/// its length in bytes in the input file. If the token needs cleaning (e.g. +/// includes a trigraph or an escaped newline) then this count includes bytes +/// that are part of that. +unsigned Lexer::MeasureTokenLength(SourceLocation Loc, + const SourceManager &SM) { + // If this comes from a macro expansion, we really do want the macro name, not + // the token this macro expanded to. + Loc = SM.getLogicalLoc(Loc); + + const char *StrData = SM.getCharacterData(Loc); + + // TODO: this could be special cased for common tokens like identifiers, ')', + // etc to make this faster, if it mattered. Just look at StrData[0] to handle + // all obviously single-char tokens. This could use + // Lexer::isObviouslySimpleCharacter for example to handle identifiers or + // something. + + + const char *BufEnd = SM.getBufferData(Loc.getFileID()).second; + + // Create a langops struct and enable trigraphs. This is sufficient for + // measuring tokens. + LangOptions LangOpts; + LangOpts.Trigraphs = true; + + // Create a lexer starting at the beginning of this token. + Lexer TheLexer(Loc, LangOpts, StrData, BufEnd); + Token TheTok; + TheLexer.LexRawToken(TheTok); + return TheTok.getLength(); +} + //===----------------------------------------------------------------------===// // Character information. //===----------------------------------------------------------------------===// diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 5954150c09..eda50382ad 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -24,6 +24,7 @@ namespace clang { class Diagnostic; +class SourceManager; class Preprocessor; /// Lexer - This provides a simple interface that turns a text buffer into a @@ -178,6 +179,13 @@ public: /// and " characters. This does not add surrounding ""'s to the string. static void Stringify(llvm::SmallVectorImpl &Str); + /// MeasureTokenLength - Relex the token at the specified location and return + /// its length in bytes in the input file. If the token needs cleaning (e.g. + /// includes a trigraph or an escaped newline) then this count includes bytes + /// that are part of that. + static unsigned MeasureTokenLength(SourceLocation Loc, + const SourceManager &SM); + //===--------------------------------------------------------------------===// // Internal implementation interfaces. private: