Make Preprocessor::Lex non-recursive.

author Eli Friedman <eli.friedman@gmail.com>

Thu, 19 Sep 2013 00:41:32 +0000 (00:41 +0000)

committer Eli Friedman <eli.friedman@gmail.com>

Thu, 19 Sep 2013 00:41:32 +0000 (00:41 +0000)
author Eli Friedman <eli.friedman@gmail.com>
Thu, 19 Sep 2013 00:41:32 +0000 (00:41 +0000)
committer Eli Friedman <eli.friedman@gmail.com>
Thu, 19 Sep 2013 00:41:32 +0000 (00:41 +0000)
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h

index 9b386e100bf220f8d5b8706067de4ede579b1c9b..f456fa9cd51ca44b7d1064442c599cceb22c34a1 100644 (file)
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -80,6 +80,12 @@ class Lexer : public PreprocessorLexer {
    // line" flag set on it.
    bool IsAtStartOfLine;
  
+  bool IsAtPhysicalStartOfLine;
+
+  bool HasLeadingSpace;
+
+  bool HasLeadingEmptyMacro;
+
    // CurrentConflictMarkerState - The kind of conflict marker we are handling.
    ConflictMarkerKind CurrentConflictMarkerState;
  
@@ -127,31 +133,21 @@ public:
    /// from.  Currently this is only used by _Pragma handling.
    SourceLocation getFileLoc() const { return FileLoc; }
  
+private:
    /// Lex - Return the next token in the file.  If this is the end of file, it
    /// return the tok::eof token.  This implicitly involves the preprocessor.
-  void Lex(Token &Result) {
-    // Start a new token.
-    Result.startToken();
-
-    // NOTE, any changes here should also change code after calls to
-    // Preprocessor::HandleDirective
-    if (IsAtStartOfLine) {
-      Result.setFlag(Token::StartOfLine);
-      IsAtStartOfLine = false;
-    }
-
-    // Get a token.  Note that this may delete the current lexer if the end of
-    // file is reached.
-    LexTokenInternal(Result);
-  }
+  bool Lex(Token &Result);
  
+public:
    /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
    bool isPragmaLexer() const { return Is_PragmaLexer; }
  
+private:
    /// IndirectLex - An indirect call to 'Lex' that can be invoked via
    ///  the PreprocessorLexer interface.
    void IndirectLex(Token &Result) { Lex(Result); }
  
+public:
    /// LexFromRawLexer - Lex a token from a designated raw lexer (one with no
    /// associated preprocessor object.  Return true if the 'next character to
    /// read' pointer points at the end of the lexer buffer, false otherwise.
@@ -447,12 +443,14 @@ private:
    /// LexTokenInternal - Internal interface to lex a preprocessing token. Called
    /// by Lex.
    ///
-  void LexTokenInternal(Token &Result);
+  bool LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine);
+
+  bool CheckUnicodeWhitespace(Token &Result, uint32_t C, const char *CurPtr);
  
    /// Given that a token begins with the Unicode character \p C, figure out
    /// what kind of token it is and dispatch to the appropriate lexing helper
    /// function.
-  void LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
+  bool LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
  
    /// FormTokenWithChars - When we lex a token, we have identified a span
    /// starting at BufferPtr, going to TokEnd that forms the token.  This method
@@ -570,24 +568,28 @@ private:
  
    void SkipBytes(unsigned Bytes, bool StartOfLine);
  
+  void PropagateLineStartLeadingSpaceInfo(Token &Result);
+
    const char *LexUDSuffix(Token &Result, const char *CurPtr,
                            bool IsStringLiteral);
  
    // Helper functions to lex the remainder of a token of the specific type.
-  void LexIdentifier         (Token &Result, const char *CurPtr);
-  void LexNumericConstant    (Token &Result, const char *CurPtr);
-  void LexStringLiteral      (Token &Result, const char *CurPtr,
+  bool LexIdentifier         (Token &Result, const char *CurPtr);
+  bool LexNumericConstant    (Token &Result, const char *CurPtr);
+  bool LexStringLiteral      (Token &Result, const char *CurPtr,
                                tok::TokenKind Kind);
-  void LexRawStringLiteral   (Token &Result, const char *CurPtr,
+  bool LexRawStringLiteral   (Token &Result, const char *CurPtr,
                                tok::TokenKind Kind);
-  void LexAngledStringLiteral(Token &Result, const char *CurPtr);
-  void LexCharConstant       (Token &Result, const char *CurPtr,
+  bool LexAngledStringLiteral(Token &Result, const char *CurPtr);
+  bool LexCharConstant       (Token &Result, const char *CurPtr,
                                tok::TokenKind Kind);
    bool LexEndOfFile          (Token &Result, const char *CurPtr);
-
-  bool SkipWhitespace        (Token &Result, const char *CurPtr);
-  bool SkipLineComment       (Token &Result, const char *CurPtr);
-  bool SkipBlockComment      (Token &Result, const char *CurPtr);
+  bool SkipWhitespace        (Token &Result, const char *CurPtr,
+                              bool &TokAtPhysicalStartOfLine);
+  bool SkipLineComment       (Token &Result, const char *CurPtr,
+                              bool &TokAtPhysicalStartOfLine);
+  bool SkipBlockComment      (Token &Result, const char *CurPtr,
+                              bool &TokAtPhysicalStartOfLine);
    bool SaveLineComment       (Token &Result, const char *CurPtr);
    
    bool IsStartOfConflictMarker(const char *CurPtr);
diff --git a/include/clang/Lex/PTHLexer.h b/include/clang/Lex/PTHLexer.h

index a9276e86b7260542bd89149847352a407677d942..d748bc1f7dffc1fe88a3dc910500b6c4b9ec499d 100644 (file)
--- a/include/clang/Lex/PTHLexer.h
+++ b/include/clang/Lex/PTHLexer.h
@@ -68,7 +68,7 @@ public:
    ~PTHLexer() {}
  
    /// Lex - Return the next token.
-  void Lex(Token &Tok);
+  bool Lex(Token &Tok);
  
    void getEOF(Token &Tok);
  
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h

index f197633c31ddf6f25639e27b43fe0ff12710d6b8..d1e16f152fd56386d05b5f5b850cd9902b278a44 100644 (file)
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -716,17 +716,8 @@ public:
    /// caching of tokens is on.
    bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
  
-  /// Lex - To lex a token from the preprocessor, just pull a token from the
-  /// current lexer or macro object.
-  void Lex(Token &Result) {
-    switch (CurLexerKind) {
-    case CLK_Lexer: CurLexer->Lex(Result); break;
-    case CLK_PTHLexer: CurPTHLexer->Lex(Result); break;
-    case CLK_TokenLexer: CurTokenLexer->Lex(Result); break;
-    case CLK_CachingLexer: CachingLex(Result); break;
-    case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); break;
-    }
-  }
+  /// Lex - Lex the next token for this preprocessor.
+  void Lex(Token &Result);
  
    void LexAfterModuleImport(Token &Result);
  
@@ -1168,7 +1159,10 @@ public:
    /// identifier and has filled in the tokens IdentifierInfo member.  This
    /// callback potentially macro expands it or turns it into a named token (like
    /// 'for').
-  void HandleIdentifier(Token &Identifier);
+  ///
+  /// \returns true if we actually computed a token, false if we need to
+  /// lex again.
+  bool HandleIdentifier(Token &Identifier);
  
  
    /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
@@ -1286,6 +1280,8 @@ private:
      IncludeMacroStack.pop_back();
    }
  
+  void PropagateLineStartLeadingSpaceInfo(Token &Result);
+
    /// \brief Allocate a new MacroInfo object.
    MacroInfo *AllocateMacroInfo();
  
@@ -1342,7 +1338,7 @@ private:
  
    /// HandleMacroExpandedIdentifier - If an identifier token is read that is to
    /// be expanded as a macro, handle it and return the next token as 'Tok'.  If
-  /// the macro should not be expanded return true, otherwise return false.
+  /// we lexed a token, return true; otherwise the caller should lex again.
    bool HandleMacroExpandedIdentifier(Token &Tok, MacroDirective *MD);
  
    /// \brief Cache macro expanded tokens for TokenLexers.
diff --git a/include/clang/Lex/Token.h b/include/clang/Lex/Token.h

index 92b0ac3bb4ca47a7bf389afa93756a8b4b4deab2..4f6391d6502d5a4f2877ccb29245c22feb92ae4f 100644 (file)
--- a/include/clang/Lex/Token.h
+++ b/include/clang/Lex/Token.h
@@ -71,8 +71,10 @@ public:
  
    // Various flags set per token:
    enum TokenFlags {
-    StartOfLine   = 0x01,  // At start of line or only after whitespace.
-    LeadingSpace  = 0x02,  // Whitespace exists before this token.
+    StartOfLine   = 0x01,  // At start of line or only after whitespace
+                           // (considering the line after macro expansion).
+    LeadingSpace  = 0x02,  // Whitespace exists before this token (considering 
+                           // whitespace after macro expansion).
      DisableExpand = 0x04,  // This identifier may never be macro expanded.
      NeedsCleaning = 0x08,  // Contained an escaped newline or trigraph.
      LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h

index 090402a811e231e8a28e2e6270c5e8a50520f2fa..7c8cfd028a28aa75a749a160221294ad32ea3263 100644 (file)
--- a/include/clang/Lex/TokenLexer.h
+++ b/include/clang/Lex/TokenLexer.h
@@ -136,7 +136,7 @@ public:
    unsigned isNextTokenLParen() const;
  
    /// Lex - Lex and return a token from this macro stream.
-  void Lex(Token &Tok);
+  bool Lex(Token &Tok);
  
    /// isParsingPreprocessorDirective - Return true if we are in the middle of a
    /// preprocessor directive.
@@ -181,6 +181,8 @@ private:
    /// macro definition.
    void updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
                                    Token *begin_tokens, Token *end_tokens);
+
+  void PropagateLineStartLeadingSpaceInfo(Token &Result);
  };
  
  }  // end namespace clang
diff --git a/lib/Frontend/VerifyDiagnosticConsumer.cpp b/lib/Frontend/VerifyDiagnosticConsumer.cpp

index f5555291a831fde0fde1bb27ad4989116f6f6abe..045e60add1fc97069b42849600ceb14efd297438 100644 (file)
--- a/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -555,7 +555,7 @@ static bool findDirectives(SourceManager &SM, FileID FID,
    VerifyDiagnosticConsumer::DirectiveStatus Status =
      VerifyDiagnosticConsumer::HasNoDirectives;
    while (Tok.isNot(tok::eof)) {
-    RawLex.Lex(Tok);
+    RawLex.LexFromRawLexer(Tok);
      if (!Tok.is(tok::comment)) continue;
  
      std::string Comment = RawLex.getSpelling(Tok, SM, LangOpts);
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp

index 7d2869bb87cd60d5a986ad45b0ddd6ea09443362..2570755cbdefbb5e6d6c04c3d99068bc84a3b023 100644 (file)
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -93,6 +93,10 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
  
    // Start of the file is a start of line.
    IsAtStartOfLine = true;
+  IsAtPhysicalStartOfLine = true;
+
+  HasLeadingSpace = false;
+  HasLeadingEmptyMacro = false;
  
    // We are not after parsing a #.
    ParsingPreprocessorDirective = false;
@@ -1361,7 +1365,11 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
    BufferPtr += Bytes;
    if (BufferPtr > BufferEnd)
      BufferPtr = BufferEnd;
+  // FIXME: What exactly does the StartOfLine bit mean?  There are two
+  // possible meanings for the "start" of the line: the first token on the
+  // unexpanded line, or the first token on the expanded line.
    IsAtStartOfLine = StartOfLine;
+  IsAtPhysicalStartOfLine = StartOfLine;
  }
  
  static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
@@ -1438,7 +1446,7 @@ static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C,
    }
   }
  
-void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
+bool Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
    // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
    unsigned Size;
    unsigned char C = *CurPtr++;
@@ -1462,7 +1470,7 @@ FinishIdentifier:
      // If we are in raw mode, return this identifier raw.  There is no need to
      // look up identifier information or attempt to macro expand it.
      if (LexingRawMode)
-      return;
+      return true;
  
      // Fill in Result.IdentifierInfo and update the token kind,
      // looking up the identifier in the identifier table.
@@ -1471,9 +1479,9 @@ FinishIdentifier:
      // Finally, now that we know we have an identifier, pass this off to the
      // preprocessor, which may macro expand it or something.
      if (II->isHandleIdentifierCase())
-      PP->HandleIdentifier(Result);
+      return PP->HandleIdentifier(Result);
      
-    return;
+    return true;
    }
  
    // Otherwise, $,\,? in identifier found.  Enter slower path.
@@ -1563,7 +1571,7 @@ bool Lexer::isHexaLiteral(const char *Start, const LangOptions &LangOpts) {
  /// LexNumericConstant - Lex the remainder of a integer or floating point
  /// constant. From[-1] is the first character lexed.  Return the end of the
  /// constant.
-void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
+bool Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
    unsigned Size;
    char C = getCharAndSize(CurPtr, Size);
    char PrevCh = 0;
@@ -1601,6 +1609,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
    const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
    Result.setLiteralData(TokStart);
+  return true;
  }
  
  /// LexUDSuffix - Lex the ud-suffix production for user-defined literal suffixes
@@ -1657,7 +1666,7 @@ const char *Lexer::LexUDSuffix(Token &Result, const char *CurPtr,
  
  /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
  /// either " or L" or u8" or u" or U".
-void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
+bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
                               tok::TokenKind Kind) {
    const char *NulCharacter = 0; // Does this string contain the \0 character?
  
@@ -1681,14 +1690,15 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
        if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
          Diag(BufferPtr, diag::ext_unterminated_string);
        FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-      return;
+      return true;
      }
      
      if (C == 0) {
        if (isCodeCompletionPoint(CurPtr-1)) {
          PP->CodeCompleteNaturalLanguage();
          FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-        return cutOffLexing();
+        cutOffLexing();
+        return true;
        }
  
        NulCharacter = CurPtr-1;
@@ -1708,11 +1718,12 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
    const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, Kind);
    Result.setLiteralData(TokStart);
+  return true;
  }
  
  /// LexRawStringLiteral - Lex the remainder of a raw string literal, after
  /// having lexed R", LR", u8R", uR", or UR".
-void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
+bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
                                  tok::TokenKind Kind) {
    // This function doesn't use getAndAdvanceChar because C++0x [lex.pptoken]p3:
    //  Between the initial and final double quote characters of the raw string,
@@ -1754,7 +1765,7 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
      }
  
      FormTokenWithChars(Result, CurPtr, tok::unknown);
-    return;
+    return true;
    }
  
    // Save prefix and move CurPtr past it
@@ -1775,7 +1786,7 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
          Diag(BufferPtr, diag::err_unterminated_raw_string)
            << StringRef(Prefix, PrefixLen);
        FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-      return;
+      return true;
      }
    }
  
@@ -1787,11 +1798,12 @@ void Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr,
    const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, Kind);
    Result.setLiteralData(TokStart);
+  return true;
  }
  
  /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
  /// after having lexed the '<' character.  This is used for #include filenames.
-void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
+bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
    const char *NulCharacter = 0; // Does this string contain the \0 character?
    const char *AfterLessPos = CurPtr;
    char C = getAndAdvanceChar(CurPtr, Result);
@@ -1806,7 +1818,7 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
        // If the filename is unterminated, then it must just be a lone <
        // character.  Return this as such.
        FormTokenWithChars(Result, AfterLessPos, tok::less);
-      return;
+      return true;
      } else if (C == 0) {
        NulCharacter = CurPtr-1;
      }
@@ -1821,12 +1833,13 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
    const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
    Result.setLiteralData(TokStart);
+  return true;
  }
  
  
  /// LexCharConstant - Lex the remainder of a character constant, after having
  /// lexed either ' or L' or u' or U'.
-void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
+bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
                              tok::TokenKind Kind) {
    const char *NulCharacter = 0; // Does this character contain the \0 character?
  
@@ -1841,7 +1854,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
      if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
        Diag(BufferPtr, diag::ext_empty_character);
      FormTokenWithChars(Result, CurPtr, tok::unknown);
-    return;
+    return true;
    }
  
    while (C != '\'') {
@@ -1854,14 +1867,15 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
        if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
          Diag(BufferPtr, diag::ext_unterminated_char);
        FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-      return;
+      return true;
      }
  
      if (C == 0) {
        if (isCodeCompletionPoint(CurPtr-1)) {
          PP->CodeCompleteNaturalLanguage();
          FormTokenWithChars(Result, CurPtr-1, tok::unknown);
-        return cutOffLexing();
+        cutOffLexing();
+        return true;
        }
  
        NulCharacter = CurPtr-1;
@@ -1881,6 +1895,7 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
    const char *TokStart = BufferPtr;
    FormTokenWithChars(Result, CurPtr, Kind);
    Result.setLiteralData(TokStart);
+  return true;
  }
  
  /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
@@ -1888,7 +1903,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
  ///
  /// This method forms a token and returns true if KeepWhitespaceMode is enabled.
  ///
-bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
+bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr,
+                           bool &TokAtPhysicalStartOfLine) {
    // Whitespace - Skip it, then return the token after the whitespace.
    bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
  
@@ -1918,8 +1934,10 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
    // If the client wants us to return whitespace, return it now.
    if (isKeepWhitespaceMode()) {
      FormTokenWithChars(Result, CurPtr, tok::unknown);
-    if (SawNewline)
+    if (SawNewline) {
        IsAtStartOfLine = true;
+      IsAtPhysicalStartOfLine = true;
+    }
      // FIXME: The next token will not have LeadingSpace set.
      return true;
    }
@@ -1929,8 +1947,10 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
    bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
  
    Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
-  if (SawNewline)
+  if (SawNewline) {
      Result.setFlag(Token::StartOfLine);
+    TokAtPhysicalStartOfLine = true;
+  }
  
    BufferPtr = CurPtr;
    return false;
@@ -1942,7 +1962,8 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
  ///
  /// If we're in KeepCommentMode or any CommentHandler has inserted
  /// some tokens, this will store the first token and return true.
-bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
+bool Lexer::SkipLineComment(Token &Result, const char *CurPtr,
+                            bool &TokAtPhysicalStartOfLine) {
    // If Line comments aren't explicitly enabled for this language, emit an
    // extension warning.
    if (!LangOpts.LineComment && !isLexingRawMode()) {
@@ -2061,6 +2082,7 @@ bool Lexer::SkipLineComment(Token &Result, const char *CurPtr) {
  
    // The next returned token is at the start of the line.
    Result.setFlag(Token::StartOfLine);
+  TokAtPhysicalStartOfLine = true;
    // No leading whitespace seen so far.
    Result.clearFlag(Token::LeadingSpace);
    BufferPtr = CurPtr;
@@ -2171,7 +2193,8 @@ static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr,
  ///
  /// If we're in KeepCommentMode or any CommentHandler has inserted
  /// some tokens, this will store the first token and return true.
-bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
+bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr,
+                             bool &TokAtPhysicalStartOfLine) {
    // Scan one character past where we should, looking for a '/' character.  Once
    // we find it, check to see if it was preceded by a *.  This common
    // optimization helps people who like to put a lot of * characters in their
@@ -2322,7 +2345,7 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
    // efficiently now.  This is safe even in KeepWhitespaceMode because we would
    // have already returned above with the comment as a token.
    if (isHorizontalWhitespace(*CurPtr)) {
-    SkipWhitespace(Result, CurPtr+1);
+    SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
      return false;
    }
  
@@ -2472,14 +2495,19 @@ unsigned Lexer::isNextPPTokenLParen() {
    // Save state that can be changed while lexing so that we can restore it.
    const char *TmpBufferPtr = BufferPtr;
    bool inPPDirectiveMode = ParsingPreprocessorDirective;
+  bool atStartOfLine = IsAtStartOfLine;
+  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
+  bool leadingSpace = HasLeadingSpace;
  
    Token Tok;
-  Tok.startToken();
-  LexTokenInternal(Tok);
+  Lex(Tok);
  
    // Restore state that may have changed.
    BufferPtr = TmpBufferPtr;
    ParsingPreprocessorDirective = inPPDirectiveMode;
+  HasLeadingSpace = leadingSpace;
+  IsAtStartOfLine = atStartOfLine;
+  IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
  
    // Restore the lexer back to non-skipping mode.
    LexingRawMode = false;
@@ -2712,7 +2740,8 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc,
    return CodePoint;
  }
  
-void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+bool Lexer::CheckUnicodeWhitespace(Token &Result, uint32_t C,
+                                   const char *CurPtr) {
    static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
        UnicodeWhitespaceCharRanges);
    if (!isLexingRawMode() && !PP->isPreprocessedOutput() &&
@@ -2721,12 +2750,12 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
        << makeCharRange(*this, BufferPtr, CurPtr);
  
      Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
-
-    return LexTokenInternal(Result);
+    return true;
    }
+  return false;
+}
  
+bool Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
    if (isAllowedIDChar(C, LangOpts) && isAllowedInitiallyIDChar(C, LangOpts)) {
      if (!isLexingRawMode() && !ParsingPreprocessorDirective &&
          !PP->isPreprocessedOutput()) {
@@ -2755,22 +2784,56 @@ void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
        << FixItHint::CreateRemoval(makeCharRange(*this, BufferPtr, CurPtr));
  
      BufferPtr = CurPtr;
-    return LexTokenInternal(Result);
+    return false;
    }
  
    // Otherwise, we have an explicit UCN or a character that's unlikely to show
    // up by accident.
    MIOpt.ReadToken();
    FormTokenWithChars(Result, CurPtr, tok::unknown);
+  return true;
+}
+
+void Lexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+  IsAtStartOfLine = Result.isAtStartOfLine();
+  HasLeadingSpace = Result.hasLeadingSpace();
+  HasLeadingEmptyMacro = Result.hasLeadingEmptyMacro();
+  // Note that this doesn't affect IsAtPhysicalStartOfLine.
  }
  
+bool Lexer::Lex(Token &Result) {
+  // Start a new token.
+  Result.startToken();
+
+  // Set up misc whitespace flags for LexTokenInternal.
+  if (IsAtStartOfLine) {
+    Result.setFlag(Token::StartOfLine);
+    IsAtStartOfLine = false;
+  }
+
+  if (HasLeadingSpace) {
+    Result.setFlag(Token::LeadingSpace);
+    HasLeadingSpace = false;
+  }
+
+  if (HasLeadingEmptyMacro) {
+    Result.setFlag(Token::LeadingEmptyMacro);
+    HasLeadingEmptyMacro = false;
+  }
+
+  bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
+  IsAtPhysicalStartOfLine = false;
+  bool result = LexTokenInternal(Result, atPhysicalStartOfLine);
+  assert((result || !isLexingRawMode()) && "Raw lex must succeed");
+  return result;
+}
  
  /// LexTokenInternal - This implements a simple C family lexer.  It is an
  /// extremely performance critical piece of code.  This assumes that the buffer
  /// has a null character at the end of the file.  This returns a preprocessing
  /// token, not a normal token, as such, it is an internal interface.  It assumes
  /// that the Flags of result have been cleared before calling this.
-void Lexer::LexTokenInternal(Token &Result) {
+bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) {
  LexNextToken:
    // New token, can't need cleaning yet.
    Result.clearFlag(Token::NeedsCleaning);
@@ -2791,7 +2854,7 @@ LexNextToken:
      if (isKeepWhitespaceMode()) {
        FormTokenWithChars(Result, CurPtr, tok::unknown);
        // FIXME: The next token will not have LeadingSpace set.
-      return;
+      return true;
      }
  
      BufferPtr = CurPtr;
@@ -2807,43 +2870,32 @@ LexNextToken:
    switch (Char) {
    case 0:  // Null.
      // Found end of file?
-    if (CurPtr-1 == BufferEnd) {
-      // Read the PP instance variable into an automatic variable, because
-      // LexEndOfFile will often delete 'this'.
-      Preprocessor *PPCache = PP;
-      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
-        return;   // Got a token to return.
-      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
-      return PPCache->Lex(Result);
-    }
+    if (CurPtr-1 == BufferEnd)
+      return LexEndOfFile(Result, CurPtr-1);
  
      // Check if we are performing code completion.
      if (isCodeCompletionPoint(CurPtr-1)) {
        // Return the code-completion token.
        Result.startToken();
        FormTokenWithChars(Result, CurPtr, tok::code_completion);
-      return;
+      return true;
      }
  
      if (!isLexingRawMode())
        Diag(CurPtr-1, diag::null_in_file);
      Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
+    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+      return true; // KeepWhitespaceMode
  
-    goto LexNextToken;   // GCC isn't tail call eliminating.
+    // We know the lexer hasn't changed, so just try again with this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
+    goto LexNextToken;
        
    case 26:  // DOS & CP/M EOF: "^Z".
      // If we're in Microsoft extensions mode, treat this as end of file.
-    if (LangOpts.MicrosoftExt) {
-      // Read the PP instance variable into an automatic variable, because
-      // LexEndOfFile will often delete 'this'.
-      Preprocessor *PPCache = PP;
-      if (LexEndOfFile(Result, CurPtr-1))  // Retreat back into the file.
-        return;   // Got a token to return.
-      assert(PPCache && "Raw buffer::LexEndOfFile should return a token");
-      return PPCache->Lex(Result);
-    }
+    if (LangOpts.MicrosoftExt)
+      return LexEndOfFile(Result, CurPtr-1);
+
      // If Microsoft extensions are disabled, this is just random garbage.
      Kind = tok::unknown;
      break;
@@ -2862,6 +2914,7 @@ LexNextToken:
  
        // Since we consumed a newline, we are back at the start of a line.
        IsAtStartOfLine = true;
+      IsAtPhysicalStartOfLine = true;
  
        Kind = tok::eod;
        break;
@@ -2870,17 +2923,20 @@ LexNextToken:
      // No leading whitespace seen so far.
      Result.clearFlag(Token::LeadingSpace);
  
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
-    goto LexNextToken;   // GCC isn't tail call eliminating.
+    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+      return true; // KeepWhitespaceMode
+
+    // We only saw whitespace, so just try again with this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
+    goto LexNextToken;
    case ' ':
    case '\t':
    case '\f':
    case '\v':
    SkipHorizontalWhitespace:
      Result.setFlag(Token::LeadingSpace);
-    if (SkipWhitespace(Result, CurPtr))
-      return; // KeepWhitespaceMode
+    if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+      return true; // KeepWhitespaceMode
  
    SkipIgnoredUnits:
      CurPtr = BufferPtr;
@@ -2890,17 +2946,19 @@ LexNextToken:
      if (CurPtr[0] == '/' && CurPtr[1] == '/' && !inKeepCommentMode() &&
          LangOpts.LineComment &&
          (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
-      if (SkipLineComment(Result, CurPtr+2))
-        return; // There is a token to return.
+      if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+        return true; // There is a token to return.
        goto SkipIgnoredUnits;
      } else if (CurPtr[0] == '/' && CurPtr[1] == '*' && !inKeepCommentMode()) {
-      if (SkipBlockComment(Result, CurPtr+2))
-        return; // There is a token to return.
+      if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
+        return true; // There is a token to return.
        goto SkipIgnoredUnits;
      } else if (isHorizontalWhitespace(*CurPtr)) {
        goto SkipHorizontalWhitespace;
      }
-    goto LexNextToken;   // GCC isn't tail call eliminating.
+    // We only saw whitespace, so just try again with this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
+    goto LexNextToken;
        
    // C99 6.4.4.1: Integer Constants.
    // C99 6.4.4.2: Floating Constants.
@@ -3193,8 +3251,9 @@ LexNextToken:
            TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) != '*';
  
        if (TreatAsComment) {
-        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
-          return; // There is a token to return.
+        if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                            TokAtPhysicalStartOfLine))
+          return true; // There is a token to return.
  
          // It is common for the tokens immediately after a // comment to be
          // whitespace (indentation for the next line).  Instead of going through
@@ -3204,9 +3263,13 @@ LexNextToken:
      }
  
      if (Char == '*') {  // /**/ comment.
-      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result)))
-        return; // There is a token to return.
-      goto LexNextToken;   // GCC isn't tail call eliminating.
+      if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
+                           TokAtPhysicalStartOfLine))
+        return true; // There is a token to return.
+
+      // We only saw whitespace, so just try again with this lexer.
+      // (We manually eliminate the tail call to avoid recursion.)
+      goto LexNextToken;
      }
  
      if (Char == '=') {
@@ -3241,7 +3304,7 @@ LexNextToken:
          // it's actually the start of a preprocessing directive.  Callback to
          // the preprocessor to handle it.
          // FIXME: -fpreprocessed mode??
-        if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+        if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
            goto HandleDirective;
  
          Kind = tok::hash;
@@ -3407,7 +3470,7 @@ LexNextToken:
        // it's actually the start of a preprocessing directive.  Callback to
        // the preprocessor to handle it.
        // FIXME: -fpreprocessed mode??
-      if (Result.isAtStartOfLine() && !LexingRawMode && !Is_PragmaLexer)
+      if (TokAtPhysicalStartOfLine && !LexingRawMode && !Is_PragmaLexer)
          goto HandleDirective;
  
        Kind = tok::hash;
@@ -3424,8 +3487,18 @@ LexNextToken:
  
    // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
    case '\\':
-    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result))
+    if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
+      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+          return true; // KeepWhitespaceMode
+
+        // We only saw whitespace, so just try again with this lexer.
+        // (We manually eliminate the tail call to avoid recursion.)
+        goto LexNextToken;
+      }
+
        return LexUnicode(Result, CodePoint, CurPtr);
+    }
  
      Kind = tok::unknown;
      break;
@@ -3446,8 +3519,17 @@ LexNextToken:
                                    (const UTF8 *)BufferEnd,
                                    &CodePoint,
                                    strictConversion);
-    if (Status == conversionOK)
+    if (Status == conversionOK) {
+      if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
+        if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
+          return true; // KeepWhitespaceMode
+
+        // We only saw whitespace, so just try again with this lexer.
+        // (We manually eliminate the tail call to avoid recursion.)
+        goto LexNextToken;
+      }
        return LexUnicode(Result, CodePoint, CurPtr);
+    }
      
      if (isLexingRawMode() || ParsingPreprocessorDirective ||
          PP->isPreprocessedOutput()) {
@@ -3462,6 +3544,9 @@ LexNextToken:
      Diag(CurPtr, diag::err_invalid_utf8);
  
      BufferPtr = CurPtr+1;
+    // We're pretending the character didn't exist, so just try again with
+    // this lexer.
+    // (We manually eliminate the tail call to avoid recursion.)
      goto LexNextToken;
    }
    }
@@ -3471,7 +3556,7 @@ LexNextToken:
  
    // Update the location of token as well as BufferPtr.
    FormTokenWithChars(Result, CurPtr, Kind);
-  return;
+  return true;
  
  HandleDirective:
    // We parsed a # character and it's the start of a preprocessing directive.
@@ -3482,21 +3567,9 @@ HandleDirective:
    if (PP->hadModuleLoaderFatalFailure()) {
      // With a fatal failure in the module loader, we abort parsing.
      assert(Result.is(tok::eof) && "Preprocessor did not set tok:eof");
-    return;
+    return true;
    }
  
-  // As an optimization, if the preprocessor didn't switch lexers, tail
-  // recurse.
-  if (PP->isCurrentLexer(this)) {
-    // Start a new token.  If this is a #include or something, the PP may
-    // want us starting at the beginning of the line again.  If so, set
-    // the StartOfLine flag and clear LeadingSpace.
-    if (IsAtStartOfLine) {
-      Result.setFlag(Token::StartOfLine);
-      Result.clearFlag(Token::LeadingSpace);
-      IsAtStartOfLine = false;
-    }
-    goto LexNextToken;   // GCC isn't tail call eliminating.
-  }
-  return PP->Lex(Result);
+  // We parsed the directive; lex a token with the new state.
+  return false;
  }
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp

index e521032bca3e948ac4752c51ceb7d33a89cf974d..1193c3ce4ecd3a7e05dc47dad09aa5b71aafc671 100644 (file)
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -231,6 +231,19 @@ static void computeRelativePath(FileManager &FM, const DirectoryEntry *Dir,
    Result = File->getName();
  }
  
+void Preprocessor::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+  if (CurTokenLexer) {
+    CurTokenLexer->PropagateLineStartLeadingSpaceInfo(Result);
+    return;
+  }
+  if (CurLexer) {
+    CurLexer->PropagateLineStartLeadingSpaceInfo(Result);
+    return;
+  }
+  // FIXME: Handle other kinds of lexers?  It generally shouldn't matter,
+  // but it might if they're empty?
+}
+
  /// HandleEndOfFile - This callback is invoked when the lexer hits the end of
  /// the current file.  This either returns the EOF token or pops a level off
  /// the include stack and keeps going.
@@ -320,6 +333,9 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
      // We're done with the #included file.
      RemoveTopOfLexerStack();
  
+    // Propagate info about start-of-line/leading white-space/etc.
+    PropagateLineStartLeadingSpaceInfo(Result);
+
      // Notify the client, if desired, that we are in a new source file.
      if (Callbacks && !isEndOfMacro && CurPPLexer) {
        SrcMgr::CharacteristicKind FileType =
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp

index 8b81d5943b25cce3fed7ab042a46de8693b78ba6..865a89e8962fc5ac2d0b8528e3cf86b5b376d3b4 100644 (file)
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -225,7 +225,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
      if (Callbacks) Callbacks->MacroExpands(Identifier, MD,
                                             Identifier.getLocation(),/*Args=*/0);
      ExpandBuiltinMacro(Identifier);
-    return false;
+    return true;
    }
  
    /// Args - If this is a function-like macro expansion, this contains,
@@ -239,11 +239,6 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
  
    // If this is a function-like macro, read the arguments.
    if (MI->isFunctionLike()) {
-    // C99 6.10.3p10: If the preprocessing token immediately after the macro
-    // name isn't a '(', this macro should not be expanded.
-    if (!isNextPPTokenLParen())
-      return true;
-
      // Remember that we are now parsing the arguments to a macro invocation.
      // Preprocessor directives used inside macro arguments are not portable, and
      // this enables the warning.
@@ -254,7 +249,7 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
      InMacroArgs = false;
  
      // If there was an error parsing the arguments, bail out.
-    if (Args == 0) return false;
+    if (Args == 0) return true;
  
      ++NumFnMacroExpanded;
    } else {
@@ -314,25 +309,12 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
      // No need for arg info.
      if (Args) Args->destroy(*this);
  
-    // Ignore this macro use, just return the next token in the current
-    // buffer.
-    bool HadLeadingSpace = Identifier.hasLeadingSpace();
-    bool IsAtStartOfLine = Identifier.isAtStartOfLine();
-
-    Lex(Identifier);
-
-    // If the identifier isn't on some OTHER line, inherit the leading
-    // whitespace/first-on-a-line property of this token.  This handles
-    // stuff like "! XX," -> "! ," and "   XX," -> "    ,", when XX is
-    // empty.
-    if (!Identifier.isAtStartOfLine()) {
-      if (IsAtStartOfLine) Identifier.setFlag(Token::StartOfLine);
-      if (HadLeadingSpace) Identifier.setFlag(Token::LeadingSpace);
-    }
+    // Propagate whitespace info as if we had pushed, then popped,
+    // a macro context.
      Identifier.setFlag(Token::LeadingEmptyMacro);
+    PropagateLineStartLeadingSpaceInfo(Identifier);
      ++NumFastMacroExpanded;
      return false;
-
    } else if (MI->getNumTokens() == 1 &&
               isTrivialSingleTokenExpansion(MI, Identifier.getIdentifierInfo(),
                                             *this)) {
@@ -378,15 +360,11 @@ bool Preprocessor::HandleMacroExpandedIdentifier(Token &Identifier,
      // Since this is not an identifier token, it can't be macro expanded, so
      // we're done.
      ++NumFastMacroExpanded;
-    return false;
+    return true;
    }
  
    // Start expanding the macro.
    EnterMacro(Identifier, ExpansionEnd, MI, Args);
-
-  // Now that the macro is at the top of the include stack, ask the
-  // preprocessor to read the next token from it.
-  Lex(Identifier);
    return false;
  }
  
diff --git a/lib/Lex/PTHLexer.cpp b/lib/Lex/PTHLexer.cpp

index 32da92ad53b8ea21fe64d7c765db25a2f14ab4ec..e2629a3b2c498cbc894a313a5f1b284b35adce51 100644 (file)
--- a/lib/Lex/PTHLexer.cpp
+++ b/lib/Lex/PTHLexer.cpp
@@ -43,9 +43,7 @@ PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
    FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
  }
  
-void PTHLexer::Lex(Token& Tok) {
-LexNextToken:
-
+bool PTHLexer::Lex(Token& Tok) {
    //===--------------------------------------==//
    // Read the raw token data.
    //===--------------------------------------==//
@@ -90,8 +88,9 @@ LexNextToken:
      Tok.setKind(II->getTokenID());
  
      if (II->isHandleIdentifierCase())
-      PP->HandleIdentifier(Tok);
-    return;
+      return PP->HandleIdentifier(Tok);
+
+    return true;
    }
  
    //===--------------------------------------==//
@@ -101,16 +100,10 @@ LexNextToken:
      // Save the end-of-file token.
      EofToken = Tok;
  
-    // Save 'PP' to 'PPCache' as LexEndOfFile can delete 'this'.
-    Preprocessor *PPCache = PP;
-
      assert(!ParsingPreprocessorDirective);
      assert(!LexingRawMode);
-    
-    if (LexEndOfFile(Tok))
-      return;
  
-    return PPCache->Lex(Tok);
+    return LexEndOfFile(Tok);
    }
  
    if (TKind == tok::hash && Tok.isAtStartOfLine()) {
@@ -118,19 +111,17 @@ LexNextToken:
      assert(!LexingRawMode);
      PP->HandleDirective(Tok);
  
-    if (PP->isCurrentLexer(this))
-      goto LexNextToken;
-
-    return PP->Lex(Tok);
+    return false;
    }
  
    if (TKind == tok::eod) {
      assert(ParsingPreprocessorDirective);
      ParsingPreprocessorDirective = false;
-    return;
+    return true;
    }
  
    MIOpt.ReadToken();
+  return true;
  }
  
  bool PTHLexer::LexEndOfFile(Token &Result) {
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp

index 035f751d0d29962067985a37bced4ce809f5c230..81e6f364cfbe8c66684d663451921711f70aedf1 100644 (file)
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -614,7 +614,7 @@ void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
  /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
  /// IdentifierInfo methods that compute these properties will need to change to
  /// match.
-void Preprocessor::HandleIdentifier(Token &Identifier) {
+bool Preprocessor::HandleIdentifier(Token &Identifier) {
    assert(Identifier.getIdentifierInfo() &&
           "Can't handle identifiers without identifier info!");
  
@@ -648,8 +648,10 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
      MacroInfo *MI = MD->getMacroInfo();
      if (!DisableMacroExpansion) {
        if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
-        if (!HandleMacroExpandedIdentifier(Identifier, MD))
-          return;
+        // C99 6.10.3p10: If the preprocessing token immediately after the
+        // macro name isn't a '(', this macro should not be expanded.
+        if (!MI->isFunctionLike() || isNextPPTokenLParen())
+          return HandleMacroExpandedIdentifier(Identifier, MD);
        } else {
          // C99 6.10.3.4p2 says that a disabled macro may never again be
          // expanded, even if it's in a context where it could be expanded in the
@@ -698,8 +700,36 @@ void Preprocessor::HandleIdentifier(Token &Identifier) {
      ModuleImportExpectsIdentifier = true;
      CurLexerKind = CLK_LexAfterModuleImport;
    }
+  return true;
  }
  
+void Preprocessor::Lex(Token &Result) {
+  // We loop here until a lex function retuns a token; this avoids recursion.
+  bool ReturnedToken;
+  do {
+    switch (CurLexerKind) {
+    case CLK_Lexer:
+      ReturnedToken = CurLexer->Lex(Result);
+      break;
+    case CLK_PTHLexer:
+      ReturnedToken = CurPTHLexer->Lex(Result);
+      break;
+    case CLK_TokenLexer:
+      ReturnedToken = CurTokenLexer->Lex(Result);
+      break;
+    case CLK_CachingLexer:
+      CachingLex(Result);
+      ReturnedToken = true;
+      break;
+    case CLK_LexAfterModuleImport:
+      LexAfterModuleImport(Result);
+      ReturnedToken = true;
+      break;
+    }
+  } while (!ReturnedToken);
+}
+
+
  /// \brief Lex a token following the 'import' contextual keyword.
  ///
  void Preprocessor::LexAfterModuleImport(Token &Result) {
diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp

index 5a59849720f67a8fc2088b5ef03ee9a3f40bc3f1..33ccbc0cfc9419195b821b311d00d54822e1e09f 100644 (file)
--- a/lib/Lex/PreprocessorLexer.cpp
+++ b/lib/Lex/PreprocessorLexer.cpp
@@ -38,7 +38,10 @@ void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
    ParsingFilename = true;
  
    // Lex the filename.
-  IndirectLex(FilenameTok);
+  if (LexingRawMode)
+    IndirectLex(FilenameTok);
+  else
+    PP->Lex(FilenameTok);
  
    // We should have obtained the filename now.
    ParsingFilename = false;
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp

index b83e76639d014a38f1f514fdca0cda133217415b..0213afcee92128c6d4991910fc8ba2415b9ea7ea 100644 (file)
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -416,22 +416,19 @@ void TokenLexer::ExpandFunctionArguments() {
  
  /// Lex - Lex and return a token from this macro stream.
  ///
-void TokenLexer::Lex(Token &Tok) {
+bool TokenLexer::Lex(Token &Tok) {
    // Lexing off the end of the macro, pop this macro off the expansion stack.
    if (isAtEnd()) {
      // If this is a macro (not a token stream), mark the macro enabled now
      // that it is no longer being expanded.
      if (Macro) Macro->EnableMacro();
  
-    // Pop this context off the preprocessors lexer stack and get the next
-    // token.  This will delete "this" so remember the PP instance var.
-    Preprocessor &PPCache = PP;
-    if (PP.HandleEndOfTokenLexer(Tok))
-      return;
-
-    // HandleEndOfTokenLexer may not return a token.  If it doesn't, lex
-    // whatever is next.
-    return PPCache.Lex(Tok);
+    Tok.startToken();
+    Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
+    Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+    if (CurToken == 0)
+      Tok.setFlag(Token::LeadingEmptyMacro);
+    return PP.HandleEndOfTokenLexer(Tok);
    }
  
    SourceManager &SM = PP.getSourceManager();
@@ -451,7 +448,7 @@ void TokenLexer::Lex(Token &Tok) {
      // When handling the microsoft /##/ extension, the final token is
      // returned by PasteTokens, not the pasted token.
      if (PasteTokens(Tok))
-      return;
+      return true;
  
      TokenIsFromPaste = true;
    }
@@ -482,6 +479,8 @@ void TokenLexer::Lex(Token &Tok) {
    if (isFirstToken) {
      Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
      Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+    AtStartOfLine = false;
+    HasLeadingSpace = false;
    }
  
    // Handle recursive expansion!
@@ -499,10 +498,11 @@ void TokenLexer::Lex(Token &Tok) {
      }
  
      if (!DisableMacroExpansion && II->isHandleIdentifierCase())
-      PP.HandleIdentifier(Tok);
+      return PP.HandleIdentifier(Tok);
    }
  
    // Otherwise, return a normal token.
+  return true;
  }
  
  /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
@@ -824,3 +824,8 @@ void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
      updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
    }
  }
+
+void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
+  AtStartOfLine = Result.isAtStartOfLine();
+  HasLeadingSpace = Result.hasLeadingSpace();
+}
diff --git a/test/Preprocessor/hash_line.c b/test/Preprocessor/hash_line.c

index 64edae0d95775aa5edbc2b06ec12109fa7a611c9..c4de9f04044e2b397d2645ad1e1e76282fc9c2fb 100644 (file)
--- a/test/Preprocessor/hash_line.c
+++ b/test/Preprocessor/hash_line.c
@@ -1,11 +1,12 @@
  // The 1 and # should not go on the same line.
  // RUN: %clang_cc1 -E %s | FileCheck --strict-whitespace %s
-// CHECK-NOT: 1{{.*}}#
  // CHECK: {{^1$}}
-// CHECK-NOT: 1{{.*}}#
-// CHECK: {{^      #$}}
-// CHECK-NOT: 1{{.*}}#
-1
+// CHECK-NEXT: {{^      #$}}
+// CHECK-NEXT: {{^2$}}
+// CHECK-NEXT: {{^           #$}}
  #define EMPTY
+#define IDENTITY(X) X
+1
  EMPTY #
-
+2
+IDENTITY() #
diff --git a/test/Preprocessor/macro_expand_empty.c b/test/Preprocessor/macro_expand_empty.c

new file mode 100644 (file)

index 0000000..3fb6394
--- /dev/null
+++ b/test/Preprocessor/macro_expand_empty.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fsyntax-only %s
+// Check that this doesn't crash
+
+#define IDENTITY1(x) x
+#define IDENTITY2(x) IDENTITY1(x) IDENTITY1(x) IDENTITY1(x) IDENTITY1(x)
+#define IDENTITY3(x) IDENTITY2(x) IDENTITY2(x) IDENTITY2(x) IDENTITY2(x)
+#define IDENTITY4(x) IDENTITY3(x) IDENTITY3(x) IDENTITY3(x) IDENTITY3(x)
+#define IDENTITY5(x) IDENTITY4(x) IDENTITY4(x) IDENTITY4(x) IDENTITY4(x)
+#define IDENTITY6(x) IDENTITY5(x) IDENTITY5(x) IDENTITY5(x) IDENTITY5(x)
+#define IDENTITY7(x) IDENTITY6(x) IDENTITY6(x) IDENTITY6(x) IDENTITY6(x)
+#define IDENTITY8(x) IDENTITY7(x) IDENTITY7(x) IDENTITY7(x) IDENTITY7(x)
+#define IDENTITY9(x) IDENTITY8(x) IDENTITY8(x) IDENTITY8(x) IDENTITY8(x)
+#define IDENTITY0(x) IDENTITY9(x) IDENTITY9(x) IDENTITY9(x) IDENTITY9(x)
+IDENTITY0()
diff --git a/test/SemaCXX/warn-empty-body.cpp b/test/SemaCXX/warn-empty-body.cpp

index d643cedf09cc5dae2b7799305c4521927ee67b07..d3aaac1d8c369640b9d627388085438d020b418d 100644 (file)
--- a/test/SemaCXX/warn-empty-body.cpp
+++ b/test/SemaCXX/warn-empty-body.cpp
@@ -269,3 +269,8 @@ void test_template_inst(int x) {
    test_template<double>(x);
  }
  
+#define IDENTITY(a) a
+void test7(int x, int y) {
+  if (x) IDENTITY(); // no-warning
+}
+
author	Eli Friedman <eli.friedman@gmail.com>
	Thu, 19 Sep 2013 00:41:32 +0000 (00:41 +0000)
committer	Eli Friedman <eli.friedman@gmail.com>
	Thu, 19 Sep 2013 00:41:32 +0000 (00:41 +0000)
include/clang/Lex/Lexer.h		patch \| blob \| history
include/clang/Lex/PTHLexer.h		patch \| blob \| history
include/clang/Lex/Preprocessor.h		patch \| blob \| history
include/clang/Lex/Token.h		patch \| blob \| history
include/clang/Lex/TokenLexer.h		patch \| blob \| history
lib/Frontend/VerifyDiagnosticConsumer.cpp		patch \| blob \| history
lib/Lex/Lexer.cpp		patch \| blob \| history
lib/Lex/PPLexerChange.cpp		patch \| blob \| history
lib/Lex/PPMacroExpansion.cpp		patch \| blob \| history
lib/Lex/PTHLexer.cpp		patch \| blob \| history
lib/Lex/Preprocessor.cpp		patch \| blob \| history
lib/Lex/PreprocessorLexer.cpp		patch \| blob \| history
lib/Lex/TokenLexer.cpp		patch \| blob \| history
test/Preprocessor/hash_line.c		patch \| blob \| history
test/Preprocessor/macro_expand_empty.c	[new file with mode: 0644]	patch \| blob
test/SemaCXX/warn-empty-body.cpp		patch \| blob \| history