From: Eric Liu Date: Fri, 2 Dec 2016 11:01:43 +0000 (+0000) Subject: [ClangFormat] Only insert #include into the #include block in the beginning of the... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ca715740dceadf31185d75d7ca7e91d135d73a22;p=clang [ClangFormat] Only insert #include into the #include block in the beginning of the file. Summary: This avoid inserting #include into: - raw string literals containing #include. - #if block. - Special #include among declarations (e.g. functions). Reviewers: djasper Subscribers: cfe-commits, klimek Differential Revision: https://reviews.llvm.org/D26909 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@288493 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h index 2c1eb947ae..d34ca2fee3 100644 --- a/include/clang/Format/Format.h +++ b/include/clang/Format/Format.h @@ -786,7 +786,14 @@ formatReplacements(StringRef Code, const tooling::Replacements &Replaces, /// This also supports inserting/deleting C++ #include directives: /// - If a replacement has offset UINT_MAX, length 0, and a replacement text /// that is an #include directive, this will insert the #include into the -/// correct block in the \p Code. +/// correct block in the \p Code. When searching for points to insert new +/// header, this ignores #include's after the #include block(s) in the +/// beginning of a file to avoid inserting headers into code sections where +/// new #include's should not be added by default. These code sections +/// include: +/// - raw string literals (containing #include). +/// - #if blocks. +/// - Special #include's among declarations (e.g. functions). /// - If a replacement has offset UINT_MAX, length 1, and a replacement text /// that is the name of the header to be removed, the header will be removed /// from \p Code if it exists. diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp index 98565e9834..4b24b0b733 100644 --- a/lib/Format/Format.cpp +++ b/lib/Format/Format.cpp @@ -1514,10 +1514,23 @@ inline bool isHeaderDeletion(const tooling::Replacement &Replace) { return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1; } -void skipComments(Lexer &Lex, Token &Tok) { - while (Tok.is(tok::comment)) - if (Lex.LexFromRawLexer(Tok)) - return; +// Returns the offset after skipping a sequence of tokens, matched by \p +// GetOffsetAfterSequence, from the start of the code. +// \p GetOffsetAfterSequence should be a function that matches a sequence of +// tokens and returns an offset after the sequence. +unsigned getOffsetAfterTokenSequence( + StringRef FileName, StringRef Code, const FormatStyle &Style, + std::function + GetOffsetAfterSequense) { + std::unique_ptr Env = + Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); + const SourceManager &SourceMgr = Env->getSourceManager(); + Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, + getFormattingLangOpts(Style)); + Token Tok; + // Get the first token. + Lex.LexFromRawLexer(Tok); + return GetOffsetAfterSequense(SourceMgr, Lex, Tok); } // Check if a sequence of tokens is like "# ". If it is, @@ -1527,31 +1540,88 @@ bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) { bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && - Tok.is(tok::raw_identifier); + tok::raw_identifier; if (Matched) Lex.LexFromRawLexer(Tok); return Matched; } +void skipComments(Lexer &Lex, Token &Tok) { + while (Tok.is(tok::comment)) + if (Lex.LexFromRawLexer(Tok)) + return; +} + +// Returns the offset after header guard directives and any comments +// before/after header guards. If no header guard presents in the code, this +// will returns the offset after skipping all comments from the start of the +// code. unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, StringRef Code, const FormatStyle &Style) { - std::unique_ptr Env = - Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{}); - const SourceManager &SourceMgr = Env->getSourceManager(); - Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr, - getFormattingLangOpts(Style)); - Token Tok; - // Get the first token. - Lex.LexFromRawLexer(Tok); - skipComments(Lex, Tok); - unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation()); - if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { - skipComments(Lex, Tok); - if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) - return SourceMgr.getFileOffset(Tok.getLocation()); + return getOffsetAfterTokenSequence( + FileName, Code, Style, + [](const SourceManager &SM, Lexer &Lex, Token Tok) { + skipComments(Lex, Tok); + unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); + if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { + skipComments(Lex, Tok); + if (checkAndConsumeDirectiveWithName(Lex, "define", Tok)) + return SM.getFileOffset(Tok.getLocation()); + } + return InitialOffset; + }); +} + +// Check if a sequence of tokens is like +// "#include ("header.h" | )". +// If it is, \p Tok will be the token after this directive; otherwise, it can be +// any token after the given \p Tok (including \p Tok). +bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { + auto Matched = [&]() { + Lex.LexFromRawLexer(Tok); + return true; + }; + if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && + Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { + if (Lex.LexFromRawLexer(Tok)) + return false; + if (Tok.is(tok::string_literal)) + return Matched(); + if (Tok.is(tok::less)) { + while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { + } + if (Tok.is(tok::greater)) + return Matched(); + } } - return AfterComments; + return false; +} + +// Returns the offset of the last #include directive after which a new +// #include can be inserted. This ignores #include's after the #include block(s) +// in the beginning of a file to avoid inserting headers into code sections +// where new #include's should not be added by default. +// These code sections include: +// - raw string literals (containing #include). +// - #if blocks. +// - Special #include's among declarations (e.g. functions). +// +// If no #include after which a new #include can be inserted, this returns the +// offset after skipping all comments from the start of the code. +// Inserting after an #include is not allowed if it comes after code that is not +// #include (e.g. pre-processing directive that is not #include, declarations). +unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, + const FormatStyle &Style) { + return getOffsetAfterTokenSequence( + FileName, Code, Style, + [](const SourceManager &SM, Lexer &Lex, Token Tok) { + skipComments(Lex, Tok); + unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); + while (checkAndConsumeInclusiveDirective(Lex, Tok)) + MaxOffset = SM.getFileOffset(Tok.getLocation()); + return MaxOffset; + }); } bool isDeletedHeader(llvm::StringRef HeaderName, @@ -1560,11 +1630,6 @@ bool isDeletedHeader(llvm::StringRef HeaderName, HeadersToDelete.count(HeaderName.trim("\"<>")); } -// FIXME: we also need to insert a '\n' at the end of the code if we have an -// insertion with offset Code.size(), and there is no '\n' at the end of the -// code. -// FIXME: do not insert headers into conditional #include blocks, e.g. #includes -// surrounded by compile condition "#if...". // FIXME: insert empty lines between newly created blocks. tooling::Replacements fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, @@ -1612,6 +1677,8 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, unsigned MinInsertOffset = getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style); StringRef TrimmedCode = Code.drop_front(MinInsertOffset); + unsigned MaxInsertOffset = + getMaxHeaderInsertionOffset(FileName, TrimmedCode, Style); SmallVector Lines; TrimmedCode.split(Lines, '\n'); unsigned Offset = MinInsertOffset; @@ -1623,11 +1690,14 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, // The header name with quotes or angle brackets. StringRef IncludeName = Matches[2]; ExistingIncludes.insert(IncludeName); - int Category = Categories.getIncludePriority( - IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); - CategoryEndOffsets[Category] = NextLineOffset; - if (FirstIncludeOffset < 0) - FirstIncludeOffset = Offset; + // Only record the offset of current #include if we can insert after it. + if (Offset <= MaxInsertOffset) { + int Category = Categories.getIncludePriority( + IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0); + CategoryEndOffsets[Category] = NextLineOffset; + if (FirstIncludeOffset < 0) + FirstIncludeOffset = Offset; + } if (isDeletedHeader(IncludeName, HeadersToDelete)) { // If this is the last line without trailing newline, we need to make // sure we don't delete across the file boundary. diff --git a/unittests/Format/CleanupTest.cpp b/unittests/Format/CleanupTest.cpp index ddd90487e5..c3e7e2b4c1 100644 --- a/unittests/Format/CleanupTest.cpp +++ b/unittests/Format/CleanupTest.cpp @@ -839,6 +839,93 @@ TEST_F(CleanUpReplacementsTest, InsertionAndDeleteHeader) { EXPECT_EQ(Expected, apply(Code, Replaces)); } +TEST_F(CleanUpReplacementsTest, NoInsertionAfterCode) { + std::string Code = "#include \"a.h\"\n" + "void f() {}\n" + "#include \"b.h\"\n"; + std::string Expected = "#include \"a.h\"\n" + "#include \"c.h\"\n" + "void f() {}\n" + "#include \"b.h\"\n"; + tooling::Replacements Replaces = toReplacements( + {createInsertion("#include \"c.h\"")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, NoInsertionInStringLiteral) { + std::string Code = "#include \"a.h\"\n" + "const char[] = R\"(\n" + "#include \"b.h\"\n" + ")\";\n"; + std::string Expected = "#include \"a.h\"\n" + "#include \"c.h\"\n" + "const char[] = R\"(\n" + "#include \"b.h\"\n" + ")\";\n"; + tooling::Replacements Replaces = + toReplacements({createInsertion("#include \"c.h\"")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, NoInsertionAfterOtherDirective) { + std::string Code = "#include \"a.h\"\n" + "#ifdef X\n" + "#include \"b.h\"\n" + "#endif\n"; + std::string Expected = "#include \"a.h\"\n" + "#include \"c.h\"\n" + "#ifdef X\n" + "#include \"b.h\"\n" + "#endif\n"; + tooling::Replacements Replaces = toReplacements( + {createInsertion("#include \"c.h\"")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, CanInsertAfterLongSystemInclude) { + std::string Code = "#include \"a.h\"\n" + "// comment\n\n" + "#include \n"; + std::string Expected = "#include \"a.h\"\n" + "// comment\n\n" + "#include \n" + "#include \n"; + tooling::Replacements Replaces = + toReplacements({createInsertion("#include ")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, CanInsertAfterComment) { + std::string Code = "#include \"a.h\"\n" + "// Comment\n" + "\n" + "/* Comment */\n" + "// Comment\n" + "\n" + "#include \"b.h\"\n"; + std::string Expected = "#include \"a.h\"\n" + "// Comment\n" + "\n" + "/* Comment */\n" + "// Comment\n" + "\n" + "#include \"b.h\"\n" + "#include \"c.h\"\n"; + tooling::Replacements Replaces = + toReplacements({createInsertion("#include \"c.h\"")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + +TEST_F(CleanUpReplacementsTest, CanDeleteAfterCode) { + std::string Code = "#include \"a.h\"\n" + "void f() {}\n" + "#include \"b.h\"\n"; + std::string Expected = "#include \"a.h\"\n" + "void f() {}\n"; + tooling::Replacements Replaces = toReplacements({createDeletion("\"b.h\"")}); + EXPECT_EQ(Expected, apply(Code, Replaces)); +} + } // end namespace } // end namespace format } // end namespace clang