From: Chris Lattner Date: Thu, 7 Feb 2008 06:03:59 +0000 (+0000) Subject: Implement support for the extremely atrocious MS /##/ extension, X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3f1cc838f9caf469990f23fccd0940263c0c61ff;p=clang Implement support for the extremely atrocious MS /##/ extension, which pastes together a comment. This is only enabled with -fms-extensions of course. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@46845 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/Lex/MacroExpander.cpp b/Lex/MacroExpander.cpp index 9d4b07216a..4e62c947e3 100644 --- a/Lex/MacroExpander.cpp +++ b/Lex/MacroExpander.cpp @@ -508,7 +508,11 @@ void MacroExpander::Lex(Token &Tok) { // If this token is followed by a token paste (##) operator, paste the tokens! if (!isAtEnd() && MacroTokens[CurToken].is(tok::hashhash)) - PasteTokens(Tok); + if (PasteTokens(Tok)) { + // When handling the microsoft /##/ extension, the final token is + // returned by PasteTokens, not the pasted token. + return; + } // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any @@ -538,7 +542,8 @@ void MacroExpander::Lex(Token &Tok) { /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there /// are is another ## after it, chomp it iteratively. Return the result as Tok. -void MacroExpander::PasteTokens(Token &Tok) { +/// If this returns true, the caller should immediately return the token. +bool MacroExpander::PasteTokens(Token &Tok) { llvm::SmallVector Buffer; do { // Consume the ## operator. @@ -621,10 +626,18 @@ void MacroExpander::PasteTokens(Token &Tok) { // This occurs with "x ## +" and other stuff. Return with Tok unmodified // and with RHS as the next token to lex. if (isInvalid) { - // If not in assembler language mode. - PP.Diag(PasteOpLoc, diag::err_pp_bad_paste, - std::string(Buffer.begin(), Buffer.end()-1)); - return; + // Test for the Microsoft extension of /##/ turning into // here on the + // error path. + if (PP.getLangOptions().Microsoft && Tok.is(tok::slash) && + RHS.is(tok::slash)) { + HandleMicrosoftCommentPaste(Tok); + return true; + } else { + // TODO: If not in assembler language mode. + PP.Diag(PasteOpLoc, diag::err_pp_bad_paste, + std::string(Buffer.begin(), Buffer.end()-1)); + return false; + } } // Turn ## into 'other' to avoid # ## # from looking like a paste operator. @@ -649,6 +662,7 @@ void MacroExpander::PasteTokens(Token &Tok) { // by saying we're skipping contents, so we need to do this manually. Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); } + return false; } /// isNextTokenLParen - If the next token lexed will pop this macro off the @@ -660,3 +674,21 @@ unsigned MacroExpander::isNextTokenLParen() const { return 2; return MacroTokens[CurToken].is(tok::l_paren); } + + +/// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes +/// together to form a comment that comments out everything in the current +/// macro, other active macros, and anything left on the current physical +/// source line of the instantiated buffer. Handle this by returning the +/// first token on the next line. +void MacroExpander::HandleMicrosoftCommentPaste(Token &Tok) { + // We 'comment out' the rest of this macro by just ignoring the rest of the + // tokens that have not been lexed yet, if any. + + // Since this must be a macro, mark the macro enabled now that it is no longer + // being expanded. + assert(Macro && "Token streams can't paste comments"); + Macro->EnableMacro(); + + PP.HandleMicrosoftCommentPaste(Tok); +} diff --git a/Lex/Preprocessor.cpp b/Lex/Preprocessor.cpp index 40fd322550..f4e737413a 100644 --- a/Lex/Preprocessor.cpp +++ b/Lex/Preprocessor.cpp @@ -1358,6 +1358,71 @@ bool Preprocessor::HandleEndOfMacro(Token &Result) { return HandleEndOfFile(Result, true); } +/// HandleMicrosoftCommentPaste - When the macro expander pastes together a +/// comment (/##/) in microsoft mode, this method handles updating the current +/// state, returning the token on the next source line. +void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) { + assert(CurMacroExpander && !CurLexer && + "Pasted comment can only be formed from macro"); + + // We handle this by scanning for the closest real lexer, switching it to + // raw mode and preprocessor mode. This will cause it to return \n as an + // explicit EOM token. + Lexer *FoundLexer = 0; + bool LexerWasInPPMode = false; + for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) { + IncludeStackInfo &ISI = *(IncludeMacroStack.end()-i-1); + if (ISI.TheLexer == 0) continue; // Scan for a real lexer. + + // Once we find a real lexer, mark it as raw mode (disabling macro + // expansions) and preprocessor mode (return EOM). We know that the lexer + // was *not* in raw mode before, because the macro that the comment came + // from was expanded. However, it could have already been in preprocessor + // mode (#if COMMENT) in which case we have to return it to that mode and + // return EOM. + FoundLexer = ISI.TheLexer; + FoundLexer->LexingRawMode = true; + LexerWasInPPMode = FoundLexer->ParsingPreprocessorDirective; + FoundLexer->ParsingPreprocessorDirective = true; + break; + } + + // Okay, we either found and switched over the lexer, or we didn't find a + // lexer. In either case, finish off the macro the comment came from, getting + // the next token. + if (!HandleEndOfMacro(Tok)) Lex(Tok); + + // Discarding comments as long as we don't have EOF or EOM. This 'comments + // out' the rest of the line, including any tokens that came from other macros + // that were active, as in: + // #define submacro a COMMENT b + // submacro c + // which should lex to 'a' only: 'b' and 'c' should be removed. + while (Tok.isNot(tok::eom) && Tok.isNot(tok::eof)) + Lex(Tok); + + // If we got an eom token, then we successfully found the end of the line. + if (Tok.is(tok::eom)) { + assert(FoundLexer && "Can't get end of line without an active lexer"); + // Restore the lexer back to normal mode instead of raw mode. + FoundLexer->LexingRawMode = false; + + // If the lexer was already in preprocessor mode, just return the EOM token + // to finish the preprocessor line. + if (LexerWasInPPMode) return; + + // Otherwise, switch out of PP mode and return the next lexed token. + FoundLexer->ParsingPreprocessorDirective = false; + return Lex(Tok); + } + + // If we got an EOF token, then we reached the end of the token stream but + // didn't find an explicit \n. This can only happen if there was no lexer + // active (an active lexer would return EOM at EOF if there was no \n in + // preprocessor directive mode), so just return EOF as our token. + assert(!FoundLexer && "Lexer should return EOM before EOF in PP mode"); + return; +} //===----------------------------------------------------------------------===// // Utility Methods for Preprocessor Directive Handling. diff --git a/include/clang/Lex/MacroExpander.h b/include/clang/Lex/MacroExpander.h index edca2e54a8..6d9211167f 100644 --- a/include/clang/Lex/MacroExpander.h +++ b/include/clang/Lex/MacroExpander.h @@ -189,12 +189,20 @@ private: /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there /// are is another ## after it, chomp it iteratively. Return the result as - /// Tok. - void PasteTokens(Token &Tok); + /// Tok. If this returns true, the caller should immediately return the + /// token. + bool PasteTokens(Token &Tok); /// Expand the arguments of a function-like macro so that we can quickly /// return preexpanded tokens from MacroTokens. void ExpandFunctionArguments(); + + /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes + /// together to form a comment that comments out everything in the current + /// macro, other active macros, and anything left on the current physical + /// source line of the instantiated buffer. Handle this by returning the + /// first token on the next line. + void HandleMicrosoftCommentPaste(Token &Tok); }; } // end namespace clang diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index ce25ae879b..1a8c2937ec 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -333,6 +333,11 @@ public: void PrintStats(); + /// HandleMicrosoftCommentPaste - When the macro expander pastes together a + /// comment (/##/) in microsoft mode, this method handles updating the current + /// state, returning the token on the next source line. + void HandleMicrosoftCommentPaste(Token &Tok); + //===--------------------------------------------------------------------===// // Preprocessor callback methods. These are invoked by a lexer as various // directives and events are found. diff --git a/test/Preprocessor/macro_paste_mscomment.c b/test/Preprocessor/macro_paste_mscomment.c new file mode 100644 index 0000000000..7e06b6d375 --- /dev/null +++ b/test/Preprocessor/macro_paste_mscomment.c @@ -0,0 +1,19 @@ +// RUN: clang %s -fsyntax-only -fms-extensions +// This horrible stuff should preprocess into (other than whitespace): +// int foo; +// int bar; +// int baz; + +int foo; + +#define comment /##/ dead tokens live here +comment This is stupidity + +int bar; + +#define nested(x) int x comment cute little dead tokens... + +nested(baz) rise of the dead tokens + +; +