From: Argyrios Kyrtzidis Date: Tue, 13 Nov 2012 01:03:15 +0000 (+0000) Subject: [preprocessor] When #including something that contributes no tokens at all, X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4d10b40ea8ee489c7b9194aa2b025df4ecd2ab01;p=clang [preprocessor] When #including something that contributes no tokens at all, don't recursively continue lexing. This avoids a stack overflow with a sequence of many empty #includes. rdar://11988695 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@167801 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def index 25e8d5a635..704f1d9b32 100644 --- a/include/clang/Basic/TokenKinds.def +++ b/include/clang/Basic/TokenKinds.def @@ -103,6 +103,8 @@ TOK(unknown) // Not a token. TOK(eof) // End of file. TOK(eod) // End of preprocessing directive (end of line inside a // directive). +TOK(included_eof) // End of included file, only if + // PreprocessorLexer::EnableIncludedEOF is true. TOK(code_completion) // Code completion marker TOK(cxx_defaultarg_end) // C++ default argument end marker diff --git a/include/clang/Lex/PreprocessorLexer.h b/include/clang/Lex/PreprocessorLexer.h index 20fb8a0c48..b85729dd08 100644 --- a/include/clang/Lex/PreprocessorLexer.h +++ b/include/clang/Lex/PreprocessorLexer.h @@ -61,6 +61,10 @@ protected: /// Note that in raw mode that the PP pointer may be null. bool LexingRawMode; + /// \brief When true, if EOF of the current lexer is found, tok::included_eof + /// is returned instead of continuing lexing higher in the include stack. + bool EnableIncludedEOF; + /// \brief A state machine that detects the \#ifndef-wrapping a file /// idiom for the multiple-include optimization. MultipleIncludeOpt MIOpt; @@ -79,7 +83,8 @@ protected: : PP(0), InitialNumSLocEntries(0), ParsingPreprocessorDirective(false), ParsingFilename(false), - LexingRawMode(false) {} + LexingRawMode(false), + EnableIncludedEOF(false) {} virtual ~PreprocessorLexer() {} @@ -147,6 +152,11 @@ public: /// \brief Return true if this lexer is in raw mode or not. bool isLexingRawMode() const { return LexingRawMode; } + /// \brief When true, if EOF of the current lexer is found, tok::included_eof + /// is returned instead of continuing lexing higher in the include stack. + /// False is the default behavior. + void setEnableIncludedEOF(bool Enable) { EnableIncludedEOF = Enable; } + /// \brief Return the preprocessor object for this lexer. Preprocessor *getPP() const { return PP; } diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index e6e7ca5ee1..7983a6b3e8 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -2595,8 +2595,14 @@ LexNextToken: // Read the PP instance variable into an automatic variable, because // LexEndOfFile will often delete 'this'. Preprocessor *PPCache = PP; + bool EnableIncludedEOFCache = EnableIncludedEOF; if (LexEndOfFile(Result, CurPtr-1)) // Retreat back into the file. return; // Got a token to return. + + if (EnableIncludedEOFCache) { + Result.setKind(tok::included_eof); + return; + } assert(PPCache && "Raw buffer::LexEndOfFile should return a token"); return PPCache->Lex(Result); } @@ -3234,5 +3240,21 @@ HandleDirective: } goto LexNextToken; // GCC isn't tail call eliminating. } + + if (PreprocessorLexer *PPLex = PP->getCurrentLexer()) { + // If we #include something that contributes no tokens at all, return with + // a tok::included_eof instead of recursively continuing lexing. + // This avoids a stack overflow with a sequence of many empty #includes. + PPLex->setEnableIncludedEOF(true); + PP->Lex(Result); + if (Result.isNot(tok::included_eof)) { + if (Result.isNot(tok::eof) && Result.isNot(tok::eod)) + PPLex->setEnableIncludedEOF(false); + return; + } + if (PP->isCurrentLexer(this)) + goto LexNextToken; + } + return PP->Lex(Result); } diff --git a/lib/Lex/PreprocessorLexer.cpp b/lib/Lex/PreprocessorLexer.cpp index a64c84d6bb..390d4c4523 100644 --- a/lib/Lex/PreprocessorLexer.cpp +++ b/lib/Lex/PreprocessorLexer.cpp @@ -22,7 +22,7 @@ void PreprocessorLexer::anchor() { } PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid) : PP(pp), FID(fid), InitialNumSLocEntries(0), ParsingPreprocessorDirective(false), - ParsingFilename(false), LexingRawMode(false) { + ParsingFilename(false), LexingRawMode(false), EnableIncludedEOF(false) { if (pp) InitialNumSLocEntries = pp->getSourceManager().local_sloc_entry_size(); } diff --git a/test/Index/Inputs/empty.h b/test/Index/Inputs/empty.h new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test/Index/pp-many-includes.c b/test/Index/pp-many-includes.c new file mode 100644 index 0000000000..8a53b4fc64 --- /dev/null +++ b/test/Index/pp-many-includes.c @@ -0,0 +1,208 @@ +// RUN: env LIBCLANG_THREAD_SMALLSTACK=1 c-index-test -index-file %s | FileCheck %s + +// rdar://11988695 +// Check that we don't get stack overflow while including a lot of empty includes. + +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" +#include "Inputs/empty.h" + +// CHECK: [indexDeclaration]: kind: function | name: foo +void foo(); diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index 3a6c408bff..bb75db3554 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -6119,6 +6119,8 @@ namespace clang { bool RunSafely(llvm::CrashRecoveryContext &CRC, void (*Fn)(void*), void *UserData, unsigned Size) { + if (getenv("LIBCLANG_THREAD_SMALLSTACK")) + Size = 0x10000; // 64K. if (!Size) Size = GetSafetyThreadStackSize(); if (Size)