Remove use of lookahead from _Pragma handling and from all other

author Richard Smith <richard-llvm@metafoo.co.uk>

Thu, 11 Apr 2019 21:18:22 +0000 (21:18 +0000)

committer Richard Smith <richard-llvm@metafoo.co.uk>

Thu, 11 Apr 2019 21:18:22 +0000 (21:18 +0000)
author Richard Smith <richard-llvm@metafoo.co.uk>
Thu, 11 Apr 2019 21:18:22 +0000 (21:18 +0000)
committer Richard Smith <richard-llvm@metafoo.co.uk>
Thu, 11 Apr 2019 21:18:22 +0000 (21:18 +0000)
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h

index 6f7ab0b01fd216deaaba7481118ad0bbe3c047e0..6ab8c5d899d65a460d2438e997bc1265827426f9 100644 (file)
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -323,6 +323,14 @@ class Preprocessor {
    /// to avoid hitting the same error over and over again.
    bool HasReachedMaxIncludeDepth = false;
  
+  /// The number of currently-active calls to Lex.
+  ///
+  /// Lex is reentrant, and asking for an (end-of-phase-4) token can often
+  /// require asking for multiple additional tokens. This counter makes it
+  /// possible for Lex to detect whether it's producing a token for the end
+  /// of phase 4 of translation or for some other situation.
+  unsigned LexLevel = 0;
+
  public:
    struct PreambleSkipInfo {
      SourceLocation HashTokenLoc;
@@ -1244,24 +1252,6 @@ public:
    /// Disable the last EnableBacktrackAtThisPos call.
    void CommitBacktrackedTokens();
  
-  struct CachedTokensRange {
-    CachedTokensTy::size_type Begin, End;
-  };
-
-private:
-  /// A range of cached tokens that should be erased after lexing
-  /// when backtracking requires the erasure of such cached tokens.
-  Optional<CachedTokensRange> CachedTokenRangeToErase;
-
-public:
-  /// Returns the range of cached tokens that were lexed since
-  /// EnableBacktrackAtThisPos() was previously called.
-  CachedTokensRange LastCachedTokenRange();
-
-  /// Erase the range of cached tokens that were lexed since
-  /// EnableBacktrackAtThisPos() was previously called.
-  void EraseCachedTokens(CachedTokensRange TokenRange);
-
    /// Make Preprocessor re-lex the tokens that were lexed since
    /// EnableBacktrackAtThisPos() was previously called.
    void Backtrack();
@@ -1353,6 +1343,7 @@ public:
    /// tokens after phase 5.  As such, it is equivalent to using
    /// 'Lex', not 'LexUnexpandedToken'.
    const Token &LookAhead(unsigned N) {
+    assert(LexLevel == 0 && "cannot use lookahead while lexing");
      if (CachedLexPos + N < CachedTokens.size())
        return CachedTokens[CachedLexPos+N];
      else
@@ -1379,8 +1370,16 @@ public:
    /// If BackTrack() is called afterwards, the token will remain at the
    /// insertion point.
    void EnterToken(const Token &Tok) {
-    EnterCachingLexMode();
-    CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
+    if (LexLevel) {
+      // It's not correct in general to enter caching lex mode while in the
+      // middle of a nested lexing action.
+      auto TokCopy = llvm::make_unique<Token[]>(1);
+      TokCopy[0] = Tok;
+      EnterTokenStream(std::move(TokCopy), 1, true);
+    } else {
+      EnterCachingLexMode();
+      CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
+    }
    }
  
    /// We notify the Preprocessor that if it is caching tokens (because
@@ -2062,6 +2061,7 @@ private:
    }
  
    void EnterCachingLexMode();
+  void EnterCachingLexModeUnchecked();
  
    void ExitCachingLexMode() {
      if (InCachingLexMode())
diff --git a/lib/Lex/PPCaching.cpp b/lib/Lex/PPCaching.cpp

index 929d35ad51b6081fd32d8920b38812bbce99fdbd..90238de8a48fd738ed1050a80d0a65f9084091d4 100644 (file)
--- a/lib/Lex/PPCaching.cpp
+++ b/lib/Lex/PPCaching.cpp
@@ -23,6 +23,7 @@ using namespace clang;
  // be called multiple times and CommitBacktrackedTokens/Backtrack calls will
  // be combined with the EnableBacktrackAtThisPos calls in reverse order.
  void Preprocessor::EnableBacktrackAtThisPos() {
+  assert(LexLevel == 0 && "cannot use lookahead while lexing");
    BacktrackPositions.push_back(CachedLexPos);
    EnterCachingLexMode();
  }
@@ -34,29 +35,6 @@ void Preprocessor::CommitBacktrackedTokens() {
    BacktrackPositions.pop_back();
  }
  
-Preprocessor::CachedTokensRange Preprocessor::LastCachedTokenRange() {
-  assert(isBacktrackEnabled());
-  auto PrevCachedLexPos = BacktrackPositions.back();
-  return CachedTokensRange{PrevCachedLexPos, CachedLexPos};
-}
-
-void Preprocessor::EraseCachedTokens(CachedTokensRange TokenRange) {
-  assert(TokenRange.Begin <= TokenRange.End);
-  if (CachedLexPos == TokenRange.Begin && TokenRange.Begin != TokenRange.End) {
-    // We have backtracked to the start of the token range as we want to consume
-    // them again. Erase the tokens only after consuming then.
-    assert(!CachedTokenRangeToErase);
-    CachedTokenRangeToErase = TokenRange;
-    return;
-  }
-  // The cached tokens were committed, so they should be erased now.
-  assert(TokenRange.End == CachedLexPos);
-  CachedTokens.erase(CachedTokens.begin() + TokenRange.Begin,
-                     CachedTokens.begin() + TokenRange.End);
-  CachedLexPos = TokenRange.Begin;
-  ExitCachingLexMode();
-}
-
  // Make Preprocessor re-lex the tokens that were lexed since
  // EnableBacktrackAtThisPos() was previously called.
  void Preprocessor::Backtrack() {
@@ -71,15 +49,12 @@ void Preprocessor::CachingLex(Token &Result) {
    if (!InCachingLexMode())
      return;
  
+  // The assert in EnterCachingLexMode should prevent this from happening.
+  assert(LexLevel == 1 &&
+         "should not use token caching within the preprocessor");
+
    if (CachedLexPos < CachedTokens.size()) {
      Result = CachedTokens[CachedLexPos++];
-    // Erase the some of the cached tokens after they are consumed when
-    // asked to do so.
-    if (CachedTokenRangeToErase &&
-        CachedTokenRangeToErase->End == CachedLexPos) {
-      EraseCachedTokens(*CachedTokenRangeToErase);
-      CachedTokenRangeToErase = None;
-    }
      return;
    }
  
@@ -88,14 +63,14 @@ void Preprocessor::CachingLex(Token &Result) {
  
    if (isBacktrackEnabled()) {
      // Cache the lexed token.
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
      CachedTokens.push_back(Result);
      ++CachedLexPos;
      return;
    }
  
    if (CachedLexPos < CachedTokens.size()) {
-    EnterCachingLexMode();
+    EnterCachingLexModeUnchecked();
    } else {
      // All cached tokens were consumed.
      CachedTokens.clear();
@@ -104,11 +79,23 @@ void Preprocessor::CachingLex(Token &Result) {
  }
  
  void Preprocessor::EnterCachingLexMode() {
+  // The caching layer sits on top of all the other lexers, so it's incorrect
+  // to cache tokens while inside a nested lex action. The cached tokens would
+  // be retained after returning to the enclosing lex action and, at best,
+  // would appear at the wrong position in the token stream.
+  assert(LexLevel == 0 &&
+         "entered caching lex mode while lexing something else");
+
    if (InCachingLexMode()) {
      assert(CurLexerKind == CLK_CachingLexer && "Unexpected lexer kind");
      return;
    }
  
+  EnterCachingLexModeUnchecked();
+}
+
+void Preprocessor::EnterCachingLexModeUnchecked() {
+  assert(CurLexerKind != CLK_CachingLexer && "already in caching lex mode");
    PushIncludeMacroStack();
    CurLexerKind = CLK_CachingLexer;
  }
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp

index 3aae6ae245f08adb588dc67f188724925301cff0..216ae39632dcfb23c1ac44c0a004e75cf13334da 100644 (file)
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -829,10 +829,10 @@ void Preprocessor::HandleSkippedDirectiveWhileUsingPCH(Token &Result,
        return HandleIncludeDirective(HashLoc, Result);
      }
      if (SkippingUntilPragmaHdrStop && II->getPPKeywordID() == tok::pp_pragma) {
-      Token P = LookAhead(0);
-      auto *II = P.getIdentifierInfo();
+      Lex(Result);
+      auto *II = Result.getIdentifierInfo();
        if (II && II->getName() == "hdrstop")
-        return HandlePragmaDirective(HashLoc, PIK_HashPragma);
+        return HandlePragmaHdrstop(Result);
      }
    }
    DiscardUntilEndOfDirective();
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp

index 286b863b3547a181fec25733e181db29d2761a56..1c87f3ecc0900cb63abf982fcf72f3c8b74ef688 100644 (file)
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -144,84 +144,72 @@ void Preprocessor::HandlePragmaDirective(SourceLocation IntroducerLoc,
      DiscardUntilEndOfDirective();
  }
  
-namespace {
-
-/// Helper class for \see Preprocessor::Handle_Pragma.
-class LexingFor_PragmaRAII {
-  Preprocessor &PP;
-  bool InMacroArgPreExpansion;
-  bool Failed = false;
-  Token &OutTok;
-  Token PragmaTok;
-
-public:
-  LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion,
-                       Token &Tok)
-      : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) {
-    if (InMacroArgPreExpansion) {
-      PragmaTok = OutTok;
-      PP.EnableBacktrackAtThisPos();
-    }
-  }
-
-  ~LexingFor_PragmaRAII() {
-    if (InMacroArgPreExpansion) {
-      // When committing/backtracking the cached pragma tokens in a macro
-      // argument pre-expansion we want to ensure that either the tokens which
-      // have been committed will be removed from the cache or that the tokens
-      // over which we just backtracked won't remain in the cache after they're
-      // consumed and that the caching will stop after consuming them.
-      // Otherwise the caching will interfere with the way macro expansion
-      // works, because we will continue to cache tokens after consuming the
-      // backtracked tokens, which shouldn't happen when we're dealing with
-      // macro argument pre-expansion.
-      auto CachedTokenRange = PP.LastCachedTokenRange();
-      if (Failed) {
-        PP.CommitBacktrackedTokens();
-      } else {
-        PP.Backtrack();
-        OutTok = PragmaTok;
-      }
-      PP.EraseCachedTokens(CachedTokenRange);
-    }
-  }
-
-  void failed() {
-    Failed = true;
-  }
-};
-
-} // namespace
-
  /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
  /// return the first token after the directive.  The _Pragma token has just
  /// been read into 'Tok'.
  void Preprocessor::Handle_Pragma(Token &Tok) {
-  // This works differently if we are pre-expanding a macro argument.
-  // In that case we don't actually "activate" the pragma now, we only lex it
-  // until we are sure it is lexically correct and then we backtrack so that
-  // we activate the pragma whenever we encounter the tokens again in the token
-  // stream. This ensures that we will activate it in the correct location
-  // or that we will ignore it if it never enters the token stream, e.g:
+  // C11 6.10.3.4/3:
+  //   all pragma unary operator expressions within [a completely
+  //   macro-replaced preprocessing token sequence] are [...] processed [after
+  //   rescanning is complete]
    //
-  //     #define EMPTY(x)
-  //     #define INACTIVE(x) EMPTY(x)
-  //     INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\""))
+  // This means that we execute _Pragma operators in two cases:
+  //
+  //  1) on token sequences that would otherwise be produced as the output of
+  //     phase 4 of preprocessing, and
+  //  2) on token sequences formed as the macro-replaced token sequence of a
+  //     macro argument
+  //
+  // Case #2 appears to be a wording bug: only _Pragmas that would survive to
+  // the end of phase 4 should actually be executed. Discussion on the WG14
+  // mailing list suggests that a _Pragma operator is notionally checked early,
+  // but only pragmas that survive to the end of phase 4 should be executed.
+  //
+  // In Case #2, we check the syntax now, but then put the tokens back into the
+  // token stream for later consumption.
+
+  struct TokenCollector {
+    Preprocessor &Self;
+    bool Collect;
+    SmallVector<Token, 3> Tokens;
+    Token &Tok;
+
+    void lex() {
+      if (Collect)
+        Tokens.push_back(Tok);
+      Self.Lex(Tok);
+    }
+
+    void revert() {
+      assert(Collect && "did not collect tokens");
+      assert(!Tokens.empty() && "collected unexpected number of tokens");
+
+      // Push the ( "string" ) tokens into the token stream.
+      auto Toks = llvm::make_unique<Token[]>(Tokens.size());
+      std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get());
+      Toks[Tokens.size() - 1] = Tok;
+      Self.EnterTokenStream(std::move(Toks), Tokens.size(),
+                            /*DisableMacroExpansion*/ true);
+
+      // ... and return the _Pragma token unchanged.
+      Tok = *Tokens.begin();
+    }
+  };
  
-  LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok);
+  TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok};
  
    // Remember the pragma token location.
    SourceLocation PragmaLoc = Tok.getLocation();
  
    // Read the '('.
-  Lex(Tok);
+  Toks.lex();
    if (Tok.isNot(tok::l_paren)) {
      Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
    }
  
    // Read the '"..."'.
-  Lex(Tok);
+  Toks.lex();
    if (!tok::isStringLiteral(Tok.getKind())) {
      Diag(PragmaLoc, diag::err__Pragma_malformed);
      // Skip bad tokens, and the ')', if present.
@@ -233,7 +221,7 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
        Lex(Tok);
      if (Tok.is(tok::r_paren))
        Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
    }
  
    if (Tok.hasUDSuffix()) {
@@ -242,21 +230,24 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
      Lex(Tok);
      if (Tok.is(tok::r_paren))
        Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
    }
  
    // Remember the string.
    Token StrTok = Tok;
  
    // Read the ')'.
-  Lex(Tok);
+  Toks.lex();
    if (Tok.isNot(tok::r_paren)) {
      Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
    }
  
-  if (InMacroArgPreExpansion)
+  // If we're expanding a macro argument, put the tokens back.
+  if (InMacroArgPreExpansion) {
+    Toks.revert();
      return;
+  }
  
    SourceLocation RParenLoc = Tok.getLocation();
    std::string StrVal = getSpelling(StrTok);
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp

index ff040a28d4b6affa3c913bae774d844f33a36fc6..d1c02c942dab1db78d899ab44f0ac82cac112430 100644 (file)
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -862,6 +862,8 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
  }
  
  void Preprocessor::Lex(Token &Result) {
+  ++LexLevel;
+
    // We loop here until a lex function returns a token; this avoids recursion.
    bool ReturnedToken;
    do {
@@ -893,6 +895,7 @@ void Preprocessor::Lex(Token &Result) {
    }
  
    LastTokenWasAt = Result.is(tok::at);
+  --LexLevel;
  }
  
  /// Lex a header-name token (including one formed from header-name-tokens if
diff --git a/test/Preprocessor/_Pragma-in-macro-arg.cpp b/test/Preprocessor/_Pragma-in-macro-arg.cpp

new file mode 100644 (file)

index 0000000..0d2dcd0
--- /dev/null
+++ b/test/Preprocessor/_Pragma-in-macro-arg.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 %s -verify -Wconversion
+
+#define P(X) _Pragma(#X)
+#define V(X) X
+
+#define X \
+  P(clang diagnostic push) \
+  P(clang diagnostic ignored "-Wconversion") \
+  ) = 1.2; \
+  P(clang diagnostic pop)
+
+void f() {
+  int a = 1.2; // expected-warning {{changes value}}
+
+  // Note, we intentionally enter a tentatively-parsed context here to trigger
+  // regular use of lookahead. This would go wrong if _Pragma checking in macro
+  // argument pre-expansion also tries to use token lookahead.
+  int (b
+  V(X)
+
+  int c = 1.2; // expected-warning {{changes value}}
+}
author	Richard Smith <richard-llvm@metafoo.co.uk>
	Thu, 11 Apr 2019 21:18:22 +0000 (21:18 +0000)
committer	Richard Smith <richard-llvm@metafoo.co.uk>
	Thu, 11 Apr 2019 21:18:22 +0000 (21:18 +0000)
include/clang/Lex/Preprocessor.h		patch \| blob \| history
lib/Lex/PPCaching.cpp		patch \| blob \| history
lib/Lex/PPDirectives.cpp		patch \| blob \| history
lib/Lex/Pragma.cpp		patch \| blob \| history
lib/Lex/Preprocessor.cpp		patch \| blob \| history
test/Preprocessor/_Pragma-in-macro-arg.cpp	[new file with mode: 0644]	patch \| blob