This change refactors some of the low-level lexer interfaces a bit.

author Chris Lattner <sabre@nondot.org>

Mon, 26 Jan 2009 19:29:26 +0000 (19:29 +0000)

committer Chris Lattner <sabre@nondot.org>

Mon, 26 Jan 2009 19:29:26 +0000 (19:29 +0000)
author Chris Lattner <sabre@nondot.org>
Mon, 26 Jan 2009 19:29:26 +0000 (19:29 +0000)
committer Chris Lattner <sabre@nondot.org>
Mon, 26 Jan 2009 19:29:26 +0000 (19:29 +0000)
diff --git a/Driver/CacheTokens.cpp b/Driver/CacheTokens.cpp

index 1d33105668fccf7856d77f3563e017beaaab358d..fcb4d2f12db117bf1da46fb76ceae36ff1d75630 100644 (file)
--- a/Driver/CacheTokens.cpp
+++ b/Driver/CacheTokens.cpp
@@ -140,32 +140,19 @@ void PTHWriter::EmitToken(const Token& T) {
    Emit32(ResolveID(T.getIdentifierInfo()));
    Emit32(fpos);
    
-  // For specific tokens we cache their spelling.
-  if (T.getIdentifierInfo())
-    return;
-
-  switch (T.getKind()) {
-    default:
-      break;
-    case tok::string_literal:     
-    case tok::wide_string_literal:
-    case tok::angle_string_literal:
-    case tok::numeric_constant:
-    case tok::char_constant: {
-      // FIXME: This uses the slow getSpelling().  Perhaps we do better
-      // in the future?  This only slows down PTH generation.
-      const std::string& spelling = PP.getSpelling(T);
-      const char* s = spelling.c_str();
-      
-      // Get the string entry.
-      llvm::StringMapEntry<Offset> *E =
-        &CachedStrs.GetOrCreateValue(s, s+spelling.size());
-
-      // Store the address of the string entry in our spelling map.
-      (*CurSpellMap).push_back(std::make_pair(fpos, E));
+  // Literals (strings, numbers, characters) get cached spellings.
+  if (T.isLiteral()) {
+    // FIXME: This uses the slow getSpelling().  Perhaps we do better
+    // in the future?  This only slows down PTH generation.
+    const std::string &spelling = PP.getSpelling(T);
+    const char* s = spelling.c_str();
+    
+    // Get the string entry.
+    llvm::StringMapEntry<Offset> *E =
+      &CachedStrs.GetOrCreateValue(s, s+spelling.size());
  
-      break;
-    }
+    // Store the address of the string entry in our spelling map.
+    CurSpellMap->push_back(std::make_pair(fpos, E));
    }
  }
  
diff --git a/Driver/PrintPreprocessedOutput.cpp b/Driver/PrintPreprocessedOutput.cpp

index 64cc4c8be65bdcbaba8f39ac99f23cf8f69334c8..deecacb1f4c5e3f3cb27e222ea79b0eee0307b4b 100644 (file)
--- a/Driver/PrintPreprocessedOutput.cpp
+++ b/Driver/PrintPreprocessedOutput.cpp
@@ -430,6 +430,7 @@ bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
      // Avoid spelling identifiers, the most common form of token.
      FirstChar = II->getName()[0];
    } else if (!Tok.needsCleaning()) {
+    // FIXME: SPEED UP LITERALS!
      SourceManager &SrcMgr = PP.getSourceManager();
      FirstChar =
        *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
@@ -556,6 +557,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP,
        const char *Str = II->getName();
        unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
        OS.write(Str, Len);
+      // FIXME: ACCELERATE LITERALS
      } else if (Tok.getLength() < 256) {
        const char *TokPtr = Buffer;
        unsigned Len = PP.getSpelling(Tok, TokPtr);
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h

index bf47bab29529fecb865713bb845a0be58c505722..f6391d9781a25eb7c3006f52cceb1a0f4593d9ac 100644 (file)
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -415,7 +415,7 @@ public:
    /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
    /// invoked.
    void AnnotateCachedTokens(const Token &Tok) {
-    assert(Tok.isAnnotationToken() && "Expected annotation token");
+    assert(Tok.isAnnotation() && "Expected annotation token");
      if (CachedLexPos != 0 && isBacktrackEnabled())
        AnnotatePreviousCachedTokens(Tok);
    }
@@ -463,11 +463,11 @@ public:
      return *SourceMgr.getCharacterData(SL);
    }
    
-  /// CreateString - Plop the specified string into a scratch buffer and return
-  /// a location for it.  If specified, the source location provides a source
-  /// location for the token.
-  SourceLocation CreateString(const char *Buf, unsigned Len,
-                              SourceLocation SourceLoc = SourceLocation());
+  /// CreateString - Plop the specified string into a scratch buffer and set the
+  /// specified token's location and length to it.  If specified, the source
+  /// location provides a location of the instantiation point of the token.
+  void CreateString(const char *Buf, unsigned Len,
+                    Token &Tok, SourceLocation SourceLoc = SourceLocation());
    
    /// DumpToken - Print the token to stderr, used for debugging.
    ///
diff --git a/include/clang/Lex/ScratchBuffer.h b/include/clang/Lex/ScratchBuffer.h

index eec6a251a560fc8700819e0ca83192c82915f507..6506f9262947cc3a06fdbc38b752249882784dae 100644 (file)
--- a/include/clang/Lex/ScratchBuffer.h
+++ b/include/clang/Lex/ScratchBuffer.h
@@ -30,17 +30,11 @@ class ScratchBuffer {
  public:
    ScratchBuffer(SourceManager &SM);
    
-  /// getToken - Splat the specified text into a temporary MemoryBuffer and
-  /// return a SourceLocation that refers to the token.  The SourceLoc value
-  /// gives a virtual location that the token will appear to be from.
-  SourceLocation getToken(const char *Buf, unsigned Len,
-                          SourceLocation SourceLoc);
-  
    /// getToken - Splat the specified text into a temporary MemoryBuffer and
    /// return a SourceLocation that refers to the token.  This is just like the
    /// previous method, but returns a location that indicates the physloc of the
    /// token.
-  SourceLocation getToken(const char *Buf, unsigned Len);
+  SourceLocation getToken(const char *Buf, unsigned Len, const char *&DestPtr);
                            
  private:
    void AllocScratchBuffer(unsigned RequestLen);
diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h

index c0dd2b7837cd967e58d6e21b3e1d92ae6db88d0b..73e087ecb6d949cb5c2108d5c996a56cfcd2181e 100644 (file)
--- a/include/clang/Lex/Token.h
+++ b/include/clang/Lex/Token.h
@@ -34,21 +34,28 @@ class Token {
    /// The location of the token.
    SourceLocation Loc;
  
-  // Conceptually these next two fields could be in a union with
-  // access depending on isAnnotationToken(). However, this causes gcc
-  // 4.2 to pessimize LexTokenInternal, a very performance critical
-  // routine. Keeping as separate members with casts until a more
-  // beautiful fix presents itself.
+  // Conceptually these next two fields could be in a union.  However, this
+  // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
+  // routine. Keeping as separate members with casts until a more beautiful fix
+  // presents itself.
  
    /// UintData - This holds either the length of the token text, when
    /// a normal token, or the end of the SourceRange when an annotation
    /// token.
    unsigned UintData;
  
-  /// PtrData - For normal tokens, this points to the uniqued
-  /// information for the identifier (if an identifier token) or
-  /// null. For annotation tokens, this points to information specific
-  /// to the annotation token.
+  /// PtrData - This is a union of four different pointer types, which depends
+  /// on what type of token this is:
+  ///  Identifiers, keywords, etc:
+  ///    This is an IdentifierInfo*, which contains the uniqued identifier
+  ///    spelling.
+  ///  Literals:  isLiteral() returns true.
+  ///    This is a pointer to the start of the token in a text buffer, which
+  ///    may be dirty (have trigraphs / escaped newlines).
+  ///  Annotations (resolved type names, C++ scopes, etc): isAnnotation().
+  ///    This is a pointer to sema-specific data for the annotation token.
+  ///  Other:
+  ///    This is null.
    void *PtrData;
  
    /// Kind - The actual flavor of token this is.
@@ -77,32 +84,40 @@ public:
    bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
    bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
  
-  bool isAnnotationToken() const { 
+  /// isLiteral - Return true if this is a "literal", like a numeric
+  /// constant, string, etc.
+  bool isLiteral() const {
+    return is(tok::numeric_constant) || is(tok::char_constant) ||
+           is(tok::string_literal) || is(tok::wide_string_literal) ||
+           is(tok::angle_string_literal);
+  }
+
+  bool isAnnotation() const { 
      return is(tok::annot_typename) || 
             is(tok::annot_cxxscope) ||
             is(tok::annot_template_id);
    }
-
+  
    /// getLocation - Return a source location identifier for the specified
    /// offset in the current file.
    SourceLocation getLocation() const { return Loc; }
    unsigned getLength() const {
-    assert(!isAnnotationToken() && "Annotation tokens have no length field");
+    assert(!isAnnotation() && "Annotation tokens have no length field");
      return UintData;
    }
  
    void setLocation(SourceLocation L) { Loc = L; }
    void setLength(unsigned Len) {
-    assert(!isAnnotationToken() && "Annotation tokens have no length field");
+    assert(!isAnnotation() && "Annotation tokens have no length field");
      UintData = Len;
    }
  
    SourceLocation getAnnotationEndLoc() const {
-    assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token");
+    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
      return SourceLocation::getFromRawEncoding(UintData);
    }
    void setAnnotationEndLoc(SourceLocation L) {
-    assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token");
+    assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
      UintData = L.getRawEncoding();
    }
  
@@ -130,19 +145,32 @@ public:
    }
    
    IdentifierInfo *getIdentifierInfo() const {
-    assert(!isAnnotationToken() && "Used IdentInfo on annotation token");
+    assert(!isAnnotation() && "Used IdentInfo on annotation token!");
+    if (isLiteral()) return 0;
      return (IdentifierInfo*) PtrData;
    }
    void setIdentifierInfo(IdentifierInfo *II) {
      PtrData = (void*) II;
    }
-
+  
+  /// getLiteralData - For a literal token (numeric constant, string, etc), this
+  /// returns a pointer to the start of it in the text buffer if known, null
+  /// otherwise.
+  const char *getLiteralData() const {
+    assert(isLiteral() && "Cannot get literal data of non-literal");
+    return reinterpret_cast<const char*>(PtrData);
+  }
+  void setLiteralData(const char *Ptr) {
+    assert(isLiteral() && "Cannot set literal data of non-literal");
+    PtrData = (void*)Ptr;
+  }
+  
    void *getAnnotationValue() const {
-    assert(isAnnotationToken() && "Used AnnotVal on non-annotation token");
+    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
      return PtrData;
    }
    void setAnnotationValue(void *val) {
-    assert(isAnnotationToken() && "Used AnnotVal on non-annotation token");
+    assert(isAnnotation() && "Used AnnotVal on non-annotation token");
      PtrData = val;
    }
    
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h

index 191613bbcbc80a6b7c51d3ec531a9d8e327b4ef6..df8646df796eaede28e13ecbf93cd563f4eeaaf6 100644 (file)
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -690,10 +690,11 @@ private:
      if (!Tok.is(tok::identifier))
        return false;
      
-    if (Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope))
+    IdentifierInfo *II = Tok.getIdentifierInfo();
+    if (Actions.isTypeName(*II, CurScope))
        return true;
      
-    return Tok.getIdentifierInfo() == Ident_super;
+    return II == Ident_super;
    }
  
    OwningExprResult ParseObjCAtExpression(SourceLocation AtLocation);
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp

index 9e8d1aa74076e42b8b36d1a0f43dbf82d75c1b37..03d81b3b9a16afc37edffd18c27649b5ca03bd7f 100644 (file)
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -624,7 +624,9 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
      return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
    
    // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
+  Result.setLiteralData(TokStart);
  }
  
  /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
@@ -655,8 +657,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
      Diag(NulCharacter, diag::null_in_string);
  
    // Update the location of the token as well as the BufferPtr instance var.
+  const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr,
                       Wide ? tok::wide_string_literal : tok::string_literal);
+  Result.setLiteralData(TokStart);
  }
  
  /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
@@ -687,7 +691,9 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
      Diag(NulCharacter, diag::null_in_string);
    
    // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
+  Result.setLiteralData(TokStart);
  }
  
  
@@ -735,7 +741,9 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
      Diag(NulCharacter, diag::null_in_char);
  
    // Update the location of token as well as BufferPtr.
+  const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, tok::char_constant);
+  Result.setLiteralData(TokStart);
  }
  
  /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
@@ -901,9 +909,8 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
    Spelling += "*/";    // add suffix.
    
    Result.setKind(tok::comment);
-  Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(),
-                                      Result.getLocation()));
-  Result.setLength(Spelling.size());
+  PP->CreateString(&Spelling[0], Spelling.size(), Result,
+                   Result.getLocation());
    return true;
  }
  
diff --git a/lib/Lex/MacroArgs.cpp b/lib/Lex/MacroArgs.cpp

index 77c96e0ab33fb5251a440aea04868cb93dd2a235..c3d7738afe0df5fc917340fe541b6e80c9058dd8 100644 (file)
--- a/lib/Lex/MacroArgs.cpp
+++ b/lib/Lex/MacroArgs.cpp
@@ -225,8 +225,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
      }
    }
    
-  Tok.setLength(Result.size());
-  Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
+  PP.CreateString(&Result[0], Result.size(), Tok);
    return Tok;
  }
  
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp

index ed67754e6e7d8f86f27fbcd29e849fb1e0159159..53aa09c130409cffa38f6f236342b8ca3a1afa8e 100644 (file)
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -89,7 +89,7 @@ const Token &Preprocessor::PeekAhead(unsigned N) {
  }
  
  void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
-  assert(Tok.isAnnotationToken() && "Expected annotation token");
+  assert(Tok.isAnnotation() && "Expected annotation token");
    assert(CachedLexPos != 0 && "Expected to have some cached tokens");
    assert(CachedTokens[CachedLexPos-1].getLocation() == Tok.getAnnotationEndLoc()
           && "The annotation should be until the most recent cached token");
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp

index b14df735ad1c408b3762b6211933ea966dcf6d52..b3e921c36894aa905b3a0389045ff409f57909d5 100644 (file)
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -429,10 +429,15 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
    char TmpBuffer[100];
    sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, 
            TM->tm_year+1900);
-  DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+  
+  Token TmpTok;
+  TmpTok.startToken();
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  DATELoc = TmpTok.getLocation();
  
    sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
-  TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
+  PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
+  TIMELoc = TmpTok.getLocation();
  }
  
  /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
@@ -463,8 +468,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
              SourceMgr.getInstantiationLineNumber(Tok.getLocation()));
      unsigned Length = strlen(TmpBuffer)-1;
      Tok.setKind(tok::numeric_constant);
-    Tok.setLength(Length);
-    Tok.setLocation(CreateString(TmpBuffer, Length+1, Tok.getLocation()));
+    CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation());
+    Tok.setLength(Length);  // Trim off space.
    } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
      SourceLocation Loc = Tok.getLocation();
      if (II == Ident__BASE_FILE__) {
@@ -480,8 +485,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
      std::string FN =SourceMgr.getSourceName(SourceMgr.getInstantiationLoc(Loc));
      FN = '"' + Lexer::Stringify(FN) + '"';
      Tok.setKind(tok::string_literal);
-    Tok.setLength(FN.size());
-    Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
+    CreateString(&FN[0], FN.size(), Tok, Tok.getLocation());
    } else if (II == Ident__DATE__) {
      if (!DATELoc.isValid())
        ComputeDATE_TIME(DATELoc, TIMELoc, *this);
@@ -511,8 +515,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
      sprintf(TmpBuffer, "%u ", Depth);
      unsigned Length = strlen(TmpBuffer)-1;
      Tok.setKind(tok::numeric_constant);
-    Tok.setLength(Length);
-    Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation()));
+    CreateString(TmpBuffer, Length, Tok, Tok.getLocation());
+    Tok.setLength(Length);  // Trim off space.
    } else if (II == Ident__TIMESTAMP__) {
      // MSVC, ICC, GCC, VisualAge C++ extension.  The generated string should be
      // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
@@ -540,8 +544,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
      unsigned Len = strlen(TmpBuffer);
      TmpBuffer[Len-1] = '"';  // Replace the newline with a quote.
      Tok.setKind(tok::string_literal);
-    Tok.setLength(Len);
-    Tok.setLocation(CreateString(TmpBuffer, Len+1, Tok.getLocation()));
+    CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation());
+    Tok.setLength(Len);  // Trim off space.
    } else {
      assert(0 && "Unknown identifier!");
    }
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp

index 922af09e5006bfce958365f0318bc2a75a4fa55d..a80ba6bd1f1f03dea554c6aa80e03ec00d813b65 100644 (file)
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -156,7 +156,10 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
    
    // Plop the string (including the newline and trailing null) into a buffer
    // where we can lex it.
-  SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size());
+  Token TmpTok;
+  TmpTok.startToken();
+  CreateString(&StrVal[0], StrVal.size(), TmpTok);
+  SourceLocation TokLoc = TmpTok.getLocation();
  
    // Make and enter a lexer object so that we lex and expand the tokens just
    // like any others.
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp

index d0a15e45c4146c476009b860af896051307a2eaa..cb0c850e7ead302ade47cab936acc939a256ffbf 100644 (file)
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -266,13 +266,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
    }
  
    // Otherwise, compute the start of the token in the input lexer buffer.
-  const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation());
+  const char *TokStart = 0;
+  
+  if (Tok.isLiteral())
+    TokStart = Tok.getLiteralData();
+  
+  if (TokStart == 0)
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation());
  
    // If this token contains nothing interesting, return it directly.
    if (!Tok.needsCleaning()) {
      Buffer = TokStart;
      return Tok.getLength();
    }
+  
    // Otherwise, hard case, relex the characters into the string.
    char *OutBuf = const_cast<char*>(Buffer);
    for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
@@ -291,11 +298,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
  /// CreateString - Plop the specified string into a scratch buffer and return a
  /// location for it.  If specified, the source location provides a source
  /// location for the token.
-SourceLocation Preprocessor::
-CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) {
-  if (SLoc.isValid())
-    return ScratchBuf->getToken(Buf, Len, SLoc);
-  return ScratchBuf->getToken(Buf, Len);
+void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
+                                SourceLocation InstantiationLoc) {
+  Tok.setLength(Len);
+  
+  const char *DestPtr;
+  SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);
+  
+  if (InstantiationLoc.isValid())
+    Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, Len);
+  Tok.setLocation(Loc);
+  
+  // If this is a literal token, set the pointer data.
+  if (Tok.isLiteral())
+    Tok.setLiteralData(DestPtr);
  }
  
  
diff --git a/lib/Lex/ScratchBuffer.cpp b/lib/Lex/ScratchBuffer.cpp

index 695a5365faf7f4eb2ab8150411e0952bece19145..9253bc0944dfa7bf7c9b9d6c30d1c73295ceeed4 100644 (file)
--- a/lib/Lex/ScratchBuffer.cpp
+++ b/lib/Lex/ScratchBuffer.cpp
@@ -30,10 +30,14 @@ ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) {
  /// return a SourceLocation that refers to the token.  This is just like the
  /// method below, but returns a location that indicates the physloc of the
  /// token.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
+SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
+                                       const char *&DestPtr) {
    if (BytesUsed+Len > ScratchBufSize)
      AllocScratchBuffer(Len);
    
+  // Return a pointer to the character data.
+  DestPtr = CurBuffer+BytesUsed;
+  
    // Copy the token data into the buffer.
    memcpy(CurBuffer+BytesUsed, Buf, Len);
  
@@ -43,16 +47,6 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
    return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len);
  }
  
-
-/// getToken - Splat the specified text into a temporary MemoryBuffer and
-/// return a SourceLocation that refers to the token.  The SourceLoc value
-/// gives a virtual location that the token will appear to be from.
-SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
-                                       SourceLocation SourceLoc) {
-  // Map the physloc to the specified sourceloc.
-  return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len);
-}
-
  void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
    // Only pay attention to the requested length if it is larger than our default
    // page size.  If it is, we allocate an entire chunk for it.  This is to
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp

index 3707ef1ed6f02b590193bc84bfc0e03766806247..7ae61beb8882d322f3a4401d3e51eda0b52b226e 100644 (file)
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -352,6 +352,7 @@ void TokenLexer::Lex(Token &Tok) {
  /// If this returns true, the caller should immediately return the token.
  bool TokenLexer::PasteTokens(Token &Tok) {
    llvm::SmallVector<char, 128> Buffer;
+  const char *ResultTokStrPtr = 0;
    do {
      // Consume the ## operator.
      SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
@@ -386,8 +387,16 @@ bool TokenLexer::PasteTokens(Token &Tok) {
      
      // Plop the pasted result (including the trailing newline and null) into a
      // scratch buffer where we can lex it.
-    SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size());
+    Token ResultTokTmp;
+    ResultTokTmp.startToken();
      
+    // Claim that the tmp token is a string_literal so that we can get the
+    // character pointer back from CreateString.
+    ResultTokTmp.setKind(tok::string_literal);
+    PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp);
+    SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
+    ResultTokStrPtr = ResultTokTmp.getLiteralData();
+
      // Lex the resultant pasted token into Result.
      Token Result;
      
@@ -405,20 +414,16 @@ bool TokenLexer::PasteTokens(Token &Tok) {
        assert(ResultTokLoc.isFileID() &&
               "Should be a raw location into scratch buffer");
        SourceManager &SourceMgr = PP.getSourceManager();
-      std::pair<FileID, unsigned> LocInfo =
-        SourceMgr.getDecomposedLoc(ResultTokLoc);
+      FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
        
-      const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first;
+      const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first;
        
        // Make a lexer to lex this string from.  Lex just this one token.
-      const char *ResultStrData = ScratchBufStart+LocInfo.second;
-      
        // Make a lexer object so that we lex and expand the paste result.
-      Lexer TL(SourceMgr.getLocForStartOfFile(LocInfo.first),
-               PP.getLangOptions(), 
-               ScratchBufStart,
-               ResultStrData, 
-               ResultStrData+LHSLen+RHSLen /*don't include null*/);
+      Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
+               PP.getLangOptions(), ScratchBufStart,
+               ResultTokStrPtr, 
+               ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/);
        
        // Lex a token in raw mode.  This way it won't look up identifiers
        // automatically, lexing off the end will return an eof token, and
@@ -442,12 +447,12 @@ bool TokenLexer::PasteTokens(Token &Tok) {
            RHS.is(tok::slash)) {
          HandleMicrosoftCommentPaste(Tok);
          return true;
-      } else {
-        // TODO: If not in assembler language mode.
-        PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
-          << std::string(Buffer.begin(), Buffer.end()-1);
-        return false;
        }
+      
+      // TODO: If not in assembler language mode.
+      PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
+        << std::string(Buffer.begin(), Buffer.end()-1);
+      return false;
      }
      
      // Turn ## into 'unknown' to avoid # ## # from looking like a paste
@@ -471,7 +476,7 @@ bool TokenLexer::PasteTokens(Token &Tok) {
    if (Tok.is(tok::identifier)) {
      // Look up the identifier info for the token.  We disabled identifier lookup
      // by saying we're skipping contents, so we need to do this manually.
-    Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
+    Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr));
    }
    return false;
  }
diff --git a/lib/Rewrite/TokenRewriter.cpp b/lib/Rewrite/TokenRewriter.cpp

index aab6fb0cce8bcea14100fc1baffb39064f7a371c..e17e80133b11d6cdecee48d229bdcfdf11c5e671 100644 (file)
--- a/lib/Rewrite/TokenRewriter.cpp
+++ b/lib/Rewrite/TokenRewriter.cpp
@@ -78,14 +78,15 @@ TokenRewriter::AddToken(const Token &T, TokenRefTy Where) {
    
  
  TokenRewriter::token_iterator
-TokenRewriter::AddTokenBefore(token_iterator I, const char *Val){
+TokenRewriter::AddTokenBefore(token_iterator I, const char *Val) {
    unsigned Len = strlen(Val);
    
    // Plop the string into the scratch buffer, then create a token for this
    // string.
    Token Tok;
    Tok.startToken();
-  Tok.setLocation(ScratchBuf->getToken(Val, Len));
+  const char *Spelling;
+  Tok.setLocation(ScratchBuf->getToken(Val, Len, Spelling));
    Tok.setLength(Len);
    
    // TODO: Form a whole lexer around this and relex the token!  For now, just
author	Chris Lattner <sabre@nondot.org>
	Mon, 26 Jan 2009 19:29:26 +0000 (19:29 +0000)
committer	Chris Lattner <sabre@nondot.org>
	Mon, 26 Jan 2009 19:29:26 +0000 (19:29 +0000)
Driver/CacheTokens.cpp		patch \| blob \| history
Driver/PrintPreprocessedOutput.cpp		patch \| blob \| history
include/clang/Lex/Preprocessor.h		patch \| blob \| history
include/clang/Lex/ScratchBuffer.h		patch \| blob \| history
include/clang/Lex/Token.h		patch \| blob \| history
include/clang/Parse/Parser.h		patch \| blob \| history
lib/Lex/Lexer.cpp		patch \| blob \| history
lib/Lex/MacroArgs.cpp		patch \| blob \| history
lib/Lex/PPCaching.cpp		patch \| blob \| history
lib/Lex/PPMacroExpansion.cpp		patch \| blob \| history
lib/Lex/Pragma.cpp		patch \| blob \| history
lib/Lex/Preprocessor.cpp		patch \| blob \| history
lib/Lex/ScratchBuffer.cpp		patch \| blob \| history
lib/Lex/TokenLexer.cpp		patch \| blob \| history
lib/Rewrite/TokenRewriter.cpp		patch \| blob \| history