From: Chris Lattner <sabre@nondot.org>
Date: Wed, 17 Oct 2007 21:18:47 +0000 (+0000)
Subject: Move token length calculation out of the diagnostics machinery into
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9a6119437672f42be5f50c3fe89fe843b1bfa5b5;p=clang

Move token length calculation out of the diagnostics machinery into
the lexer, where it can be shared.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@43090 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/Driver/TextDiagnosticPrinter.cpp b/Driver/TextDiagnosticPrinter.cpp
index 76809d7dea..81c73fd533 100644
--- a/Driver/TextDiagnosticPrinter.cpp
+++ b/Driver/TextDiagnosticPrinter.cpp
@@ -80,7 +80,7 @@ void TextDiagnosticPrinter::HighlightRange(const SourceRange &R,
       --EndColNo;  // Zero base the col #.
       
       // Add in the length of the token, so that we cover multi-char tokens.
-      EndColNo += GetTokenLength(R.getEnd());
+      EndColNo += Lexer::MeasureTokenLength(R.getEnd(), SourceMgr);
     } else {
       EndColNo = CaratLine.size();
     }
@@ -97,31 +97,6 @@ void TextDiagnosticPrinter::HighlightRange(const SourceRange &R,
     CaratLine[i] = '~';
 }
 
-/// GetTokenLength - Given the source location of a token, determine its length.
-/// This is a fully general function that uses a lexer to relex the token.
-unsigned TextDiagnosticPrinter::GetTokenLength(SourceLocation Loc) {
-  // If this comes from a macro expansion, we really do want the macro name, not
-  // the token this macro expanded to.
-  Loc = SourceMgr.getLogicalLoc(Loc);
-  const char *StrData = SourceMgr.getCharacterData(Loc);
-  const char *BufEnd = SourceMgr.getBufferData(Loc.getFileID()).second;
-  
-  // TODO: this could be special cased for common tokens like identifiers, ')',
-  // etc to make this faster, if it mattered.  This could use 
-  // Lexer::isObviouslySimpleCharacter for example.
-  
-  // Create a langops struct and enable trigraphs.  This is sufficient for
-  // measuring tokens.
-  LangOptions LangOpts;
-  LangOpts.Trigraphs = true;
-  
-  // Create a lexer starting at the beginning of this token.
-  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
-  Token TheTok;
-  TheLexer.LexRawToken(TheTok);
-  return TheTok.getLength();
-}
-
 void TextDiagnosticPrinter::HandleDiagnostic(Diagnostic::Level Level, 
                                              SourceLocation Pos,
                                              diag::kind ID,
diff --git a/Driver/TextDiagnosticPrinter.h b/Driver/TextDiagnosticPrinter.h
index 71e584ebf4..3df38c9d42 100644
--- a/Driver/TextDiagnosticPrinter.h
+++ b/Driver/TextDiagnosticPrinter.h
@@ -31,7 +31,6 @@ public:
   void HighlightRange(const SourceRange &R, unsigned LineNo,
                       std::string &CaratLine,
                       const std::string &SourceLine);
-  unsigned GetTokenLength(SourceLocation Loc);
 
   virtual void HandleDiagnostic(Diagnostic::Level DiagLevel,
                                 SourceLocation Pos,
diff --git a/Lex/Lexer.cpp b/Lex/Lexer.cpp
index c7f54ca8ff..19dcfe2f0c 100644
--- a/Lex/Lexer.cpp
+++ b/Lex/Lexer.cpp
@@ -163,6 +163,39 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
 }
 
 
+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file.  If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+                                   const SourceManager &SM) {
+  // If this comes from a macro expansion, we really do want the macro name, not
+  // the token this macro expanded to.
+  Loc = SM.getLogicalLoc(Loc);
+  
+  const char *StrData = SM.getCharacterData(Loc);
+  
+  // TODO: this could be special cased for common tokens like identifiers, ')',
+  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
+  // all obviously single-char tokens.  This could use 
+  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+  // something.
+  
+  
+  const char *BufEnd = SM.getBufferData(Loc.getFileID()).second;
+  
+  // Create a langops struct and enable trigraphs.  This is sufficient for
+  // measuring tokens.
+  LangOptions LangOpts;
+  LangOpts.Trigraphs = true;
+  
+  // Create a lexer starting at the beginning of this token.
+  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
+  Token TheTok;
+  TheLexer.LexRawToken(TheTok);
+  return TheTok.getLength();
+}
+
 //===----------------------------------------------------------------------===//
 // Character information.
 //===----------------------------------------------------------------------===//
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 5954150c09..eda50382ad 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -24,6 +24,7 @@
 
 namespace clang {
 class Diagnostic;
+class SourceManager;
 class Preprocessor;
 
 /// Lexer - This provides a simple interface that turns a text buffer into a
@@ -178,6 +179,13 @@ public:
   /// and " characters.  This does not add surrounding ""'s to the string.
   static void Stringify(llvm::SmallVectorImpl<char> &Str);
   
+  /// MeasureTokenLength - Relex the token at the specified location and return
+  /// its length in bytes in the input file.  If the token needs cleaning (e.g.
+  /// includes a trigraph or an escaped newline) then this count includes bytes
+  /// that are part of that.
+  static unsigned MeasureTokenLength(SourceLocation Loc,
+                                     const SourceManager &SM);
+  
   //===--------------------------------------------------------------------===//
   // Internal implementation interfaces.
 private: