From: Chris Lattner <sabre@nondot.org>
Date: Wed, 17 Nov 2010 07:05:50 +0000 (+0000)
Subject: move AdvanceToTokenCharacter and getLocForEndOfToken from
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7ef5c27eb6e8ebe58b52013246c06753c3613263;p=clang

move AdvanceToTokenCharacter and getLocForEndOfToken from
Preprocessor to Lexer where they make more sense.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119474 91177308-0d34-0410-b5e6-96231b3b80d8
---

diff --git a/docs/InternalsManual.html b/docs/InternalsManual.html
index cf89c190f9..0720329cbd 100644
--- a/docs/InternalsManual.html
+++ b/docs/InternalsManual.html
@@ -565,7 +565,7 @@ x = foo + bar;
 representation, the 'last' location needs to be adjusted to point to
 (or past) the end of that token with either
 <code>Lexer::MeasureTokenLength()</code> or
-<code>Preprocessor::getLocForEndOfToken()</code>. For the rare cases
+<code>Lexer::getLocForEndOfToken()</code>. For the rare cases
 where character-level source ranges information is needed we use
 the <code>CharSourceRange</code> class.</p>
 
diff --git a/include/clang/Basic/SourceLocation.h b/include/clang/Basic/SourceLocation.h
index 35f27fbebd..da2ec15e3f 100644
--- a/include/clang/Basic/SourceLocation.h
+++ b/include/clang/Basic/SourceLocation.h
@@ -121,7 +121,6 @@ public:
   /// directly.
   unsigned getRawEncoding() const { return ID; }
 
-
   /// getFromRawEncoding - Turn a raw encoding of a SourceLocation object into
   /// a real SourceLocation.
   static SourceLocation getFromRawEncoding(unsigned Encoding) {
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index 9e0fb7ee70..0237969a2d 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -228,6 +228,33 @@ public:
                                             const SourceManager &SM,
                                             const LangOptions &LangOpts);
   
+  /// AdvanceToTokenCharacter - If the current SourceLocation specifies a
+  /// location at the start of a token, return a new location that specifies a
+  /// character within the token.  This handles trigraphs and escaped newlines.
+  static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
+                                                unsigned Character,
+                                                const SourceManager &SM,
+                                                const LangOptions &Features);
+  
+  /// \brief Computes the source location just past the end of the
+  /// token at this source location.
+  ///
+  /// This routine can be used to produce a source location that
+  /// points just past the end of the token referenced by \p Loc, and
+  /// is generally used when a diagnostic needs to point just after a
+  /// token where it expected something different that it received. If
+  /// the returned source location would not be meaningful (e.g., if
+  /// it points into a macro), this routine returns an invalid
+  /// source location.
+  ///
+  /// \param Offset an offset from the end of the token, where the source
+  /// location should refer to. The default offset (0) produces a source
+  /// location pointing just past the end of the token; an offset of 1 produces
+  /// a source location pointing to the last character in the token, etc.
+  static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
+                                            const SourceManager &SM,
+                                            const LangOptions &Features);
+    
   /// \brief Compute the preamble of the given file.
   ///
   /// The preamble of a file contains the initial comments, include directives,
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index c3e7349011..c45a75fc62 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -717,7 +717,9 @@ public:
   /// location should refer to. The default offset (0) produces a source
   /// location pointing just past the end of the token; an offset of 1 produces
   /// a source location pointing to the last character in the token, etc.
-  SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0);
+  SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
+    return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, Features);
+  }
 
   /// DumpToken - Print the token to stderr, used for debugging.
   ///
@@ -729,12 +731,8 @@ public:
   /// token, return a new location that specifies a character within the token.
   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
                                          unsigned Char) const {
-    return AdvanceToTokenCharacter(FullSourceLoc(TokStart, SourceMgr), Char,
-                                   Features);
+    return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, Features);
   }
-  static FullSourceLoc AdvanceToTokenCharacter(FullSourceLoc TokStart,
-                                               unsigned Char,
-                                               const LangOptions &Features);
 
 
   /// IncrementPasteCounter - Increment the counters for the number of token
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index ad3d35af6b..3e68875768 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -444,6 +444,83 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
                                : TheTok.isAtStartOfLine());
 }
 
+
+/// AdvanceToTokenCharacter - Given a location that specifies the start of a
+/// token, return a new location that specifies a character within the token.
+SourceLocation Lexer::AdvanceToTokenCharacter(SourceLocation TokStart,
+                                              unsigned CharNo,
+                                              const SourceManager &SM,
+                                              const LangOptions &Features) {
+  // Figure out how many physical characters away the specified instantiation
+  // character is.  This needs to take into consideration newlines and
+  // trigraphs.
+  bool Invalid = false;
+  const char *TokPtr = SM.getCharacterData(TokStart, &Invalid);
+  
+  // If they request the first char of the token, we're trivially done.
+  if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
+    return TokStart;
+  
+  unsigned PhysOffset = 0;
+  
+  // The usual case is that tokens don't contain anything interesting.  Skip
+  // over the uninteresting characters.  If a token only consists of simple
+  // chars, this method is extremely fast.
+  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
+    if (CharNo == 0)
+      return TokStart.getFileLocWithOffset(PhysOffset);
+    ++TokPtr, --CharNo, ++PhysOffset;
+  }
+  
+  // If we have a character that may be a trigraph or escaped newline, use a
+  // lexer to parse it correctly.
+  for (; CharNo; --CharNo) {
+    unsigned Size;
+    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
+    TokPtr += Size;
+    PhysOffset += Size;
+  }
+  
+  // Final detail: if we end up on an escaped newline, we want to return the
+  // location of the actual byte of the token.  For example foo\<newline>bar
+  // advanced by 3 should return the location of b, not of \\.  One compounding
+  // detail of this is that the escape may be made by a trigraph.
+  if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
+    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
+  
+  return TokStart.getFileLocWithOffset(PhysOffset);
+}
+
+/// \brief Computes the source location just past the end of the
+/// token at this source location.
+///
+/// This routine can be used to produce a source location that
+/// points just past the end of the token referenced by \p Loc, and
+/// is generally used when a diagnostic needs to point just after a
+/// token where it expected something different that it received. If
+/// the returned source location would not be meaningful (e.g., if
+/// it points into a macro), this routine returns an invalid
+/// source location.
+///
+/// \param Offset an offset from the end of the token, where the source
+/// location should refer to. The default offset (0) produces a source
+/// location pointing just past the end of the token; an offset of 1 produces
+/// a source location pointing to the last character in the token, etc.
+SourceLocation Lexer::getLocForEndOfToken(SourceLocation Loc, unsigned Offset,
+                                          const SourceManager &SM,
+                                          const LangOptions &Features) {
+  if (Loc.isInvalid() || !Loc.isFileID())
+    return SourceLocation();
+  
+  unsigned Len = Lexer::MeasureTokenLength(Loc, SM, Features);
+  if (Len > Offset)
+    Len = Len - Offset;
+  else
+    return Loc;
+  
+  return AdvanceToTokenCharacter(Loc, Len, SM, Features);
+}
+
 //===----------------------------------------------------------------------===//
 // Character information.
 //===----------------------------------------------------------------------===//
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 8ad1669647..4a23355642 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -194,9 +194,11 @@ static bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
   // If we didn't consume the proper number of digits, there is a problem.
   if (UcnLenSave) {
     if (Diags) {
-      Loc = Preprocessor::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
-                                                  Features);
-      Diags->Report(Loc, diag::err_ucn_escape_incomplete);
+      SourceLocation L =
+        Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
+                                       Loc.getManager(), Features);
+      Diags->Report(FullSourceLoc(L, Loc.getManager()),
+                    diag::err_ucn_escape_incomplete);
     }
     return false;
   }
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index 332c95679b..e0eb6661d6 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -429,68 +429,6 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
 }
 
 
-/// AdvanceToTokenCharacter - Given a location that specifies the start of a
-/// token, return a new location that specifies a character within the token.
-FullSourceLoc Preprocessor::AdvanceToTokenCharacter(FullSourceLoc TokStart,
-                                                    unsigned CharNo,
-                                                  const LangOptions &Features) {
-  // Figure out how many physical characters away the specified instantiation
-  // character is.  This needs to take into consideration newlines and
-  // trigraphs.
-  bool Invalid = false;
-  const char *TokPtr = TokStart.getCharacterData(&Invalid);
-
-  // If they request the first char of the token, we're trivially done.
-  if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
-    return TokStart;
-
-  unsigned PhysOffset = 0;
-
-  // The usual case is that tokens don't contain anything interesting.  Skip
-  // over the uninteresting characters.  If a token only consists of simple
-  // chars, this method is extremely fast.
-  while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
-    if (CharNo == 0)
-      return FullSourceLoc(TokStart.getFileLocWithOffset(PhysOffset),
-                           TokStart.getManager());
-    ++TokPtr, --CharNo, ++PhysOffset;
-  }
-
-  // If we have a character that may be a trigraph or escaped newline, use a
-  // lexer to parse it correctly.
-  for (; CharNo; --CharNo) {
-    unsigned Size;
-    Lexer::getCharAndSizeNoWarn(TokPtr, Size, Features);
-    TokPtr += Size;
-    PhysOffset += Size;
-  }
-
-  // Final detail: if we end up on an escaped newline, we want to return the
-  // location of the actual byte of the token.  For example foo\<newline>bar
-  // advanced by 3 should return the location of b, not of \\.  One compounding
-  // detail of this is that the escape may be made by a trigraph.
-  if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
-    PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
-
-  return FullSourceLoc(TokStart.getFileLocWithOffset(PhysOffset),
-                       TokStart.getManager());
-}
-
-SourceLocation Preprocessor::getLocForEndOfToken(SourceLocation Loc,
-                                                 unsigned Offset) {
-  if (Loc.isInvalid() || !Loc.isFileID())
-    return SourceLocation();
-
-  unsigned Len = Lexer::MeasureTokenLength(Loc, getSourceManager(), Features);
-  if (Len > Offset)
-    Len = Len - Offset;
-  else
-    return Loc;
-
-  return AdvanceToTokenCharacter(Loc, Len);
-}
-
-
 
 //===----------------------------------------------------------------------===//
 // Preprocessor Initialization Methods