Preprocessor: preserve whitespace in -traditional-cpp mode.

author Jordan Rose <jordan_rose@apple.com>

Thu, 21 Feb 2013 18:53:19 +0000 (18:53 +0000)

committer Jordan Rose <jordan_rose@apple.com>

Thu, 21 Feb 2013 18:53:19 +0000 (18:53 +0000)
author Jordan Rose <jordan_rose@apple.com>
Thu, 21 Feb 2013 18:53:19 +0000 (18:53 +0000)
committer Jordan Rose <jordan_rose@apple.com>
Thu, 21 Feb 2013 18:53:19 +0000 (18:53 +0000)
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h

index 535baf588f8400b15bd8b532b5bc4a8e8e770ee6..57e6c9200a5778120250e482c485b177ae3fdf03 100644 (file)
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -174,8 +174,8 @@ public:
    /// SetKeepWhitespaceMode - This method lets clients enable or disable
    /// whitespace retention mode.
    void SetKeepWhitespaceMode(bool Val) {
-    assert((!Val || LexingRawMode) &&
-           "Can only enable whitespace retention in raw mode");
+    assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
+           "Can only retain whitespace in raw mode or -traditional-cpp");
      ExtendedTokenMode = Val ? 2 : 0;
    }
  
@@ -194,6 +194,14 @@ public:
      ExtendedTokenMode = Mode ? 1 : 0;
    }
  
+  /// Sets the extended token mode back to its initial value, according to the
+  /// language options and preprocessor. This controls whether the lexer
+  /// produces comment and whitespace tokens.
+  ///
+  /// This requires the lexer to have an associated preprocessor. A standalone
+  /// lexer has nothing to reset to.
+  void resetExtendedTokenMode();
+
    const char *getBufferStart() const { return BufferStart; }
  
    /// ReadToEndOfLine - Read the rest of the current preprocessor line as an
diff --git a/lib/Frontend/PrintPreprocessedOutput.cpp b/lib/Frontend/PrintPreprocessedOutput.cpp

index c85945b8941a869405afd69a0aa238390a5fabda..3d55adceff92c221213f21a35374ec9b1f3a9854 100644 (file)
--- a/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -548,7 +548,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
  
        // Tokens that can contain embedded newlines need to adjust our current
        // line number.
-      if (Tok.getKind() == tok::comment)
+      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
          Callbacks->HandleNewlinesInToken(TokPtr, Len);
      } else {
        std::string S = PP.getSpelling(Tok);
@@ -556,7 +556,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
  
        // Tokens that can contain embedded newlines need to adjust our current
        // line number.
-      if (Tok.getKind() == tok::comment)
+      if (Tok.getKind() == tok::comment || Tok.getKind() == tok::unknown)
          Callbacks->HandleNewlinesInToken(&S[0], S.size());
      }
      Callbacks->setEmittedTokensOnThisLine();
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp

index 0590d9e114a7dc8862477785c1d34b3e3e662688..65ea5e3996442ee754354c24f060dd70ec587779 100644 (file)
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -122,8 +122,15 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
    InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
              InputFile->getBufferEnd());
  
-  // Default to keeping comments if the preprocessor wants them.
-  SetCommentRetentionState(PP.getCommentRetentionState());
+  resetExtendedTokenMode();
+}
+
+void Lexer::resetExtendedTokenMode() {
+  assert(PP && "Cannot reset token mode without a preprocessor");
+  if (LangOpts.TraditionalCPP)
+    SetKeepWhitespaceMode(true);
+  else
+    SetCommentRetentionState(PP->getCommentRetentionState());
  }
  
  /// Lexer constructor - Create a new raw lexer object.  This object is only
@@ -1844,6 +1851,8 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr,
  ///
  bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
    // Whitespace - Skip it, then return the token after the whitespace.
+  bool SawNewline = isVerticalWhitespace(CurPtr[-1]);
+
    unsigned char Char = *CurPtr;  // Skip consequtive spaces efficiently.
    while (1) {
      // Skip horizontal whitespace very aggressively.
@@ -1851,7 +1860,7 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
        Char = *++CurPtr;
  
      // Otherwise if we have something other than whitespace, we're done.
-    if (Char != '\n' && Char != '\r')
+    if (!isVerticalWhitespace(Char))
        break;
  
      if (ParsingPreprocessorDirective) {
@@ -1861,24 +1870,27 @@ bool Lexer::SkipWhitespace(Token &Result, const char *CurPtr) {
      }
  
      // ok, but handle newline.
-    // The returned token is at the start of the line.
-    Result.setFlag(Token::StartOfLine);
-    // No leading whitespace seen so far.
-    Result.clearFlag(Token::LeadingSpace);
+    SawNewline = true;
      Char = *++CurPtr;
    }
  
-  // If this isn't immediately after a newline, there is leading space.
-  char PrevChar = CurPtr[-1];
-  if (PrevChar != '\n' && PrevChar != '\r')
-    Result.setFlag(Token::LeadingSpace);
-
    // If the client wants us to return whitespace, return it now.
    if (isKeepWhitespaceMode()) {
      FormTokenWithChars(Result, CurPtr, tok::unknown);
+    if (SawNewline)
+      IsAtStartOfLine = true;
+    // FIXME: The next token will not have LeadingSpace set.
      return true;
    }
  
+  // If this isn't immediately after a newline, there is leading space.
+  char PrevChar = CurPtr[-1];
+  bool HasLeadingSpace = !isVerticalWhitespace(PrevChar);
+
+  Result.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
+  if (SawNewline)
+    Result.setFlag(Token::StartOfLine);
+
    BufferPtr = CurPtr;
    return false;
  }
@@ -2269,7 +2281,6 @@ bool Lexer::SkipBlockComment(Token &Result, const char *CurPtr) {
    // efficiently now.  This is safe even in KeepWhitespaceMode because we would
    // have already returned above with the comment as a token.
    if (isHorizontalWhitespace(*CurPtr)) {
-    Result.setFlag(Token::LeadingSpace);
      SkipWhitespace(Result, CurPtr+1);
      return false;
    }
@@ -2351,7 +2362,7 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
      FormTokenWithChars(Result, CurPtr, tok::eod);
  
      // Restore comment saving mode, in case it was disabled for directive.
-    SetCommentRetentionState(PP->getCommentRetentionState());
+    resetExtendedTokenMode();
      return true;  // Have a token.
    }
   
@@ -2718,6 +2729,7 @@ LexNextToken:
      // whitespace.
      if (isKeepWhitespaceMode()) {
        FormTokenWithChars(Result, CurPtr, tok::unknown);
+      // FIXME: The next token will not have LeadingSpace set.
        return;
      }
  
@@ -2785,7 +2797,7 @@ LexNextToken:
  
        // Restore comment saving mode, in case it was disabled for directive.
        if (PP)
-        SetCommentRetentionState(PP->getCommentRetentionState());
+        resetExtendedTokenMode();
  
        // Since we consumed a newline, we are back at the start of a line.
        IsAtStartOfLine = true;
@@ -2793,8 +2805,7 @@ LexNextToken:
        Kind = tok::eod;
        break;
      }
-    // The returned token is at the start of the line.
-    Result.setFlag(Token::StartOfLine);
+
      // No leading whitespace seen so far.
      Result.clearFlag(Token::LeadingSpace);
  
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp

index 18250281c1db2561f0d2b1854655e006ed290ac4..54457c36cfde0a303214ccf1a8450780df66a88b 100644 (file)
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -269,7 +269,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
      if (Tok.isNot(tok::raw_identifier)) {
        CurPPLexer->ParsingPreprocessorDirective = false;
        // Restore comment saving mode.
-      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      if (CurLexer) CurLexer->resetExtendedTokenMode();
        continue;
      }
  
@@ -285,7 +285,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
          FirstChar != 'i' && FirstChar != 'e') {
        CurPPLexer->ParsingPreprocessorDirective = false;
        // Restore comment saving mode.
-      if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+      if (CurLexer) CurLexer->resetExtendedTokenMode();
        continue;
      }
  
@@ -302,7 +302,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
        if (IdLen >= 20) {
          CurPPLexer->ParsingPreprocessorDirective = false;
          // Restore comment saving mode.
-        if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+        if (CurLexer) CurLexer->resetExtendedTokenMode();
          continue;
        }
        memcpy(DirectiveBuf, &DirectiveStr[0], IdLen);
@@ -408,7 +408,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
  
      CurPPLexer->ParsingPreprocessorDirective = false;
      // Restore comment saving mode.
-    if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
+    if (CurLexer) CurLexer->resetExtendedTokenMode();
    }
  
    // Finally, if we are out of the conditional (saw an #endif or ran off the end
@@ -594,6 +594,7 @@ void Preprocessor::HandleDirective(Token &Result) {
    // mode.  Tell the lexer this so any newlines we see will be converted into an
    // EOD token (which terminates the directive).
    CurPPLexer->ParsingPreprocessorDirective = true;
+  if (CurLexer) CurLexer->SetKeepWhitespaceMode(false);
  
    ++NumDirectives;
  
@@ -638,14 +639,9 @@ void Preprocessor::HandleDirective(Token &Result) {
    // and reset to previous state when returning from this function.
    ResetMacroExpansionHelper helper(this);
  
-TryAgain:
    switch (Result.getKind()) {
    case tok::eod:
      return;   // null directive.
-  case tok::comment:
-    // Handle stuff like "# /*foo*/ define X" in -E -C mode.
-    LexUnexpandedToken(Result);
-    goto TryAgain;
    case tok::code_completion:
      if (CodeComplete)
        CodeComplete->CodeCompleteDirective(
diff --git a/test/Preprocessor/traditional-cpp.c b/test/Preprocessor/traditional-cpp.c

index 5fc9ee398ecf17405ff57e43b0fa73ec048ceea2..72024546ff3d6af83a54260d5280322946783ebb 100644 (file)
--- a/test/Preprocessor/traditional-cpp.c
+++ b/test/Preprocessor/traditional-cpp.c
@@ -4,9 +4,61 @@
  
  /*
   RUN: %clang_cc1 -traditional-cpp %s -E -o %t
- RUN: FileCheck < %t %s
+ RUN: FileCheck -strict-whitespace < %t %s
  */
  
-/* CHECK: foo // bar
+/* CHECK: {{^}}foo // bar{{$}}
   */
  foo // bar
+
+
+/* The lines in this file contain hard tab characters and trailing whitespace; 
+ * do not change them! */
+
+/* CHECK: {{^}}        indented!{{$}}
+ * CHECK: {{^}}tab     separated       values{{$}}
+ */
+       indented!
+tab    separated       values
+
+#define bracket(x) >>>x<<<
+bracket(|  spaces  |)
+/* CHECK: {{^}}>>>|  spaces  |<<<{{$}}
+ */
+
+/* This is still a preprocessing directive. */
+# define foo bar
+foo!
+-
+       foo!    foo!    
+/* CHECK: {{^}}bar!{{$}}
+ * CHECK: {{^}}        bar!    bar!    {{$}}
+ */
+
+/* Deliberately check a leading newline with spaces on that line. */
+   
+# define foo bar
+foo!
+-
+       foo!    foo!    
+/* CHECK: {{^}}bar!{{$}}
+ * CHECK: {{^}}        bar!    bar!    {{$}}
+ */
+
+/* FIXME: -traditional-cpp should not consider this a preprocessing directive
+ * because the # isn't in the first column.
+ */
+ #define foo2 bar
+foo2!
+/* If this were working, both of these checks would be on.
+ * CHECK-NOT: {{^}} #define foo2 bar{{$}}
+ * CHECK-NOT: {{^}}foo2!{{$}}
+ */
+
+/* FIXME: -traditional-cpp should not homogenize whitespace in macros.
+ */
+#define bracket2(x) >>>  x  <<<
+bracket2(spaces)
+/* If this were working, this check would be on.
+ * CHECK-NOT: {{^}}>>>  spaces  <<<{{$}}
+ */
author	Jordan Rose <jordan_rose@apple.com>
	Thu, 21 Feb 2013 18:53:19 +0000 (18:53 +0000)
committer	Jordan Rose <jordan_rose@apple.com>
	Thu, 21 Feb 2013 18:53:19 +0000 (18:53 +0000)
include/clang/Lex/Lexer.h		patch \| blob \| history
lib/Frontend/PrintPreprocessedOutput.cpp		patch \| blob \| history
lib/Lex/Lexer.cpp		patch \| blob \| history
lib/Lex/PPDirectives.cpp		patch \| blob \| history
test/Preprocessor/traditional-cpp.c		patch \| blob \| history