From e92b41ea8c68738b6362a31ad98cd1d8894266fb Mon Sep 17 00:00:00 2001 From: Alex Lorenz Date: Mon, 3 Jun 2019 22:59:17 +0000 Subject: [PATCH] Add clang source minimizer that reduces source to directives that might affect the dependency list for a compilation This commit introduces a dependency directives source minimizer to clang that minimizes header and source files to the minimum necessary preprocessor directives for evaluating includes. It reduces the source down to #define, #include, The source minimizer works by lexing the input with a custom fast lexer that recognizes the preprocessor directives it cares about, and emitting those directives in the minimized source. It ignores source code, comments, and normalizes whitespace. It gives up and fails if seems any directives that it doesn't recognize as valid (e.g. #define 0). In addition to the source minimizer this patch adds a -print-dependency-directives-minimized-source CC1 option that allows you to invoke the minimizer from clang directly. Differential Revision: https://reviews.llvm.org/D55463 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@362459 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/DiagnosticLexKinds.td | 9 + include/clang/Driver/CC1Options.td | 3 + include/clang/Frontend/FrontendActions.h | 11 + include/clang/Frontend/FrontendOptions.h | 5 +- .../Lex/DependencyDirectivesSourceMinimizer.h | 88 ++ lib/Frontend/CompilerInvocation.cpp | 5 + lib/Frontend/FrontendActions.cpp | 33 +- .../ExecuteCompilerInvocation.cpp | 2 + lib/Lex/CMakeLists.txt | 1 + .../DependencyDirectivesSourceMinimizer.cpp | 756 ++++++++++++++++++ ...minimize_source_to_dependency_directives.c | 14 + ...ndency_directives_at_import_extra_tokens.m | 3 + ...ndency_directives_at_import_missing_semi.m | 3 + ...dependency_directives_invalid_macro_name.c | 3 + unittests/Lex/CMakeLists.txt | 1 + ...ependencyDirectivesSourceMinimizerTest.cpp | 508 ++++++++++++ 16 files changed, 1443 insertions(+), 2 deletions(-) create mode 100644 include/clang/Lex/DependencyDirectivesSourceMinimizer.h create mode 100644 lib/Lex/DependencyDirectivesSourceMinimizer.cpp create mode 100644 test/Frontend/minimize_source_to_dependency_directives.c create mode 100644 test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m create mode 100644 test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m create mode 100644 test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c create mode 100644 unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td index dd5e2af3d2..b64cbc23f8 100644 --- a/include/clang/Basic/DiagnosticLexKinds.td +++ b/include/clang/Basic/DiagnosticLexKinds.td @@ -818,4 +818,13 @@ def err_pp_eof_in_assume_nonnull : Error< } +let CategoryName = "Dependency Directive Source Minimization Issue" in { + +def err_dep_source_minimizer_missing_sema_after_at_import : Error< + "could not find ';' after @import">; +def err_dep_source_minimizer_unexpected_tokens_at_import : Error< + "unexpected extra tokens at end of @import declaration">; + +} + } diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td index 76b36a1826..56ff05d3e4 100644 --- a/include/clang/Driver/CC1Options.td +++ b/include/clang/Driver/CC1Options.td @@ -612,6 +612,9 @@ def migrate : Flag<["-"], "migrate">, HelpText<"Migrate source code">; def compiler_options_dump : Flag<["-"], "compiler-options-dump">, HelpText<"Dump the compiler configuration options">; +def print_dependency_directives_minimized_source : Flag<["-"], + "print-dependency-directives-minimized-source">, + HelpText<"Print the output of the dependency directives source minimizer">; } def emit_llvm_uselists : Flag<["-"], "emit-llvm-uselists">, diff --git a/include/clang/Frontend/FrontendActions.h b/include/clang/Frontend/FrontendActions.h index e3b8b46165..846b26897c 100644 --- a/include/clang/Frontend/FrontendActions.h +++ b/include/clang/Frontend/FrontendActions.h @@ -240,6 +240,17 @@ protected: bool usesPreprocessorOnly() const override { return true; } }; +class PrintDependencyDirectivesSourceMinimizerAction : public FrontendAction { +protected: + void ExecuteAction() override; + std::unique_ptr CreateASTConsumer(CompilerInstance &, + StringRef) override { + return nullptr; + } + + bool usesPreprocessorOnly() const override { return true; } +}; + //===----------------------------------------------------------------------===// // Preprocessor Actions //===----------------------------------------------------------------------===// diff --git a/include/clang/Frontend/FrontendOptions.h b/include/clang/Frontend/FrontendOptions.h index ce0b6964f5..1bbd048967 100644 --- a/include/clang/Frontend/FrontendOptions.h +++ b/include/clang/Frontend/FrontendOptions.h @@ -128,7 +128,10 @@ enum ActionKind { MigrateSource, /// Just lex, no output. - RunPreprocessorOnly + RunPreprocessorOnly, + + /// Print the output of the dependency directives source minimizer. + PrintDependencyDirectivesSourceMinimizerOutput }; } // namespace frontend diff --git a/include/clang/Lex/DependencyDirectivesSourceMinimizer.h b/include/clang/Lex/DependencyDirectivesSourceMinimizer.h new file mode 100644 index 0000000000..41641078af --- /dev/null +++ b/include/clang/Lex/DependencyDirectivesSourceMinimizer.h @@ -0,0 +1,88 @@ +//===- clang/Lex/DependencyDirectivesSourceMinimizer.h - ----------*- C++ -*-// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the interface for minimizing header and source files to the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H +#define LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { + +class DiagnosticsEngine; + +namespace minimize_source_to_dependency_directives { + +/// Represents the kind of preprocessor directive or a module declaration that +/// is tracked by the source minimizer in its token output. +enum TokenKind { + pp_none, + pp_include, + pp___include_macros, + pp_define, + pp_undef, + pp_import, + pp_pragma_import, + pp_include_next, + pp_if, + pp_ifdef, + pp_ifndef, + pp_elif, + pp_else, + pp_endif, + decl_at_import, + pp_eof, +}; + +/// Represents a simplified token that's lexed as part of the source +/// minimization. It's used to track the location of various preprocessor +/// directives that could potentially have an effect on the depedencies. +struct Token { + /// The kind of token. + TokenKind K = pp_none; + + /// Offset into the output byte stream of where the directive begins. + int Offset = -1; + + Token(TokenKind K, int Offset) : K(K), Offset(Offset) {} +}; + +} // end namespace minimize_source_to_dependency_directives + +/// Minimize the input down to the preprocessor directives that might have +/// an effect on the dependencies for a compilation unit. +/// +/// This function deletes all non-preprocessor code, and strips anything that +/// can't affect what gets included. It canonicalizes whitespace where +/// convenient to stabilize the output against formatting changes in the input. +/// +/// Clears the output vectors at the beginning of the call. +/// +/// \returns false on success, true on error. If the diagnostic engine is not +/// null, an appropriate error is reported using the given input location +/// with the offset that corresponds to the minimizer's current buffer offset. +bool minimizeSourceToDependencyDirectives( + llvm::StringRef Input, llvm::SmallVectorImpl &Output, + llvm::SmallVectorImpl + &Tokens, + DiagnosticsEngine *Diags = nullptr, + SourceLocation InputSourceLoc = SourceLocation()); + +} // end namespace clang + +#endif // LLVM_CLANG_LEX_DEPENDENCY_DIRECTIVES_SOURCE_MINIMIZER_H diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp index 717278c086..7ac58ae4da 100644 --- a/lib/Frontend/CompilerInvocation.cpp +++ b/lib/Frontend/CompilerInvocation.cpp @@ -1696,6 +1696,10 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args, Opts.ProgramAction = frontend::MigrateSource; break; case OPT_Eonly: Opts.ProgramAction = frontend::RunPreprocessorOnly; break; + case OPT_print_dependency_directives_minimized_source: + Opts.ProgramAction = + frontend::PrintDependencyDirectivesSourceMinimizerOutput; + break; } } @@ -3116,6 +3120,7 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) { case frontend::PrintPreprocessedInput: case frontend::RewriteMacros: case frontend::RunPreprocessorOnly: + case frontend::PrintDependencyDirectivesSourceMinimizerOutput: return true; } llvm_unreachable("invalid frontend action"); diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp index 9e86323545..7d54d66514 100644 --- a/lib/Frontend/FrontendActions.cpp +++ b/lib/Frontend/FrontendActions.cpp @@ -14,6 +14,7 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Frontend/MultiplexConsumer.h" #include "clang/Frontend/Utils.h" +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "clang/Lex/PreprocessorOptions.h" @@ -23,8 +24,8 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -908,3 +909,33 @@ void DumpCompilerOptionsAction::ExecuteAction() { OS << "}"; } + +void PrintDependencyDirectivesSourceMinimizerAction::ExecuteAction() { + CompilerInstance &CI = getCompilerInstance(); + SourceManager &SM = CI.getPreprocessor().getSourceManager(); + const llvm::MemoryBuffer *FromFile = SM.getBuffer(SM.getMainFileID()); + + llvm::SmallString<1024> Output; + llvm::SmallVector Toks; + if (minimizeSourceToDependencyDirectives( + FromFile->getBuffer(), Output, Toks, &CI.getDiagnostics(), + SM.getLocForStartOfFile(SM.getMainFileID()))) { + assert(CI.getDiagnostics().hasErrorOccurred() && + "no errors reported for failure"); + + // Preprocess the source when verifying the diagnostics to capture the + // 'expected' comments. + if (CI.getDiagnosticOpts().VerifyDiagnostics) { + // Make sure we don't emit new diagnostics! + CI.getDiagnostics().setSuppressAllDiagnostics(); + Preprocessor &PP = getCompilerInstance().getPreprocessor(); + PP.EnterMainSourceFile(); + Token Tok; + do { + PP.Lex(Tok); + } while (Tok.isNot(tok::eof)); + } + return; + } + llvm::outs() << Output; +} diff --git a/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/lib/FrontendTool/ExecuteCompilerInvocation.cpp index da7aa7b82a..b6a20a7bb6 100644 --- a/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -116,6 +116,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) { case RunAnalysis: Action = "RunAnalysis"; break; #endif case RunPreprocessorOnly: return llvm::make_unique(); + case PrintDependencyDirectivesSourceMinimizerOutput: + return llvm::make_unique(); } #if !CLANG_ENABLE_ARCMT || !CLANG_ENABLE_STATIC_ANALYZER \ diff --git a/lib/Lex/CMakeLists.txt b/lib/Lex/CMakeLists.txt index 7888b15cb6..d77e6ddb66 100644 --- a/lib/Lex/CMakeLists.txt +++ b/lib/Lex/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS support) add_clang_library(clangLex + DependencyDirectivesSourceMinimizer.cpp HeaderMap.cpp HeaderSearch.cpp Lexer.cpp diff --git a/lib/Lex/DependencyDirectivesSourceMinimizer.cpp b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp new file mode 100644 index 0000000000..802b7ba159 --- /dev/null +++ b/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -0,0 +1,756 @@ +//===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This is the implementation for minimizing header and source files to the +/// minimum necessary preprocessor directives for evaluating includes. It +/// reduces the source down to #define, #include, #import, @import, and any +/// conditional preprocessor logic that contains one of those. +/// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Lex/LexDiagnostic.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace clang; +using namespace clang::minimize_source_to_dependency_directives; + +namespace { + +struct Minimizer { + /// Minimized output. + SmallVectorImpl &Out; + /// The known tokens encountered during the minimization. + SmallVectorImpl &Tokens; + + Minimizer(SmallVectorImpl &Out, SmallVectorImpl &Tokens, + StringRef Input, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) + : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags), + InputSourceLoc(InputSourceLoc) {} + + /// Lex the provided source and emit the minimized output. + /// + /// \returns True on error. + bool minimize(); + +private: + struct IdInfo { + const char *Last; + StringRef Name; + }; + + /// Lex an identifier. + /// + /// \pre First points at a valid identifier head. + LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End); + LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First, + const char *const End); + LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End); + LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexAt(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End); + LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End); + LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End); + LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive, + const char *&First, const char *const End); + Token &makeToken(TokenKind K) { + Tokens.emplace_back(K, Out.size()); + return Tokens.back(); + } + void popToken() { + Out.resize(Tokens.back().Offset); + Tokens.pop_back(); + } + TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; } + + Minimizer &put(char Byte) { + Out.push_back(Byte); + return *this; + } + Minimizer &append(StringRef S) { return append(S.begin(), S.end()); } + Minimizer &append(const char *First, const char *Last) { + Out.append(First, Last); + return *this; + } + + void printToNewline(const char *&First, const char *const End); + void printAdjacentModuleNameParts(const char *&First, const char *const End); + LLVM_NODISCARD bool printAtImportBody(const char *&First, + const char *const End); + void printDirectiveBody(const char *&First, const char *const End); + void printAdjacentMacroArgs(const char *&First, const char *const End); + LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End); + + /// Reports a diagnostic if the diagnostic engine is provided. Always returns + /// true at the end. + bool reportError(const char *CurPtr, unsigned Err); + + StringMap SplitIds; + StringRef Input; + DiagnosticsEngine *Diags; + SourceLocation InputSourceLoc; +}; + +} // end anonymous namespace + +bool Minimizer::reportError(const char *CurPtr, unsigned Err) { + if (!Diags) + return true; + assert(CurPtr >= Input.data() && "invalid buffer ptr"); + Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err); + return true; +} + +static void skipOverSpaces(const char *&First, const char *const End) { + while (First != End && isHorizontalWhitespace(*First)) + ++First; +} + +LLVM_NODISCARD static bool isRawStringLiteral(const char *First, + const char *Current) { + assert(First <= Current); + + // Check if we can even back up. + if (*Current != '\"' || First == Current) + return false; + + // Check for an "R". + --Current; + if (*Current != 'R') + return false; + if (First == Current || !isIdentifierBody(*--Current)) + return true; + + // Check for a prefix of "u", "U", or "L". + if (*Current == 'u' || *Current == 'U' || *Current == 'L') + return First == Current || !isIdentifierBody(*--Current); + + // Check for a prefix of "u8". + if (*Current != '8' || First == Current || *Current-- != 'u') + return false; + return First == Current || !isIdentifierBody(*--Current); +} + +static void skipRawString(const char *&First, const char *const End) { + assert(First[0] == '\"'); + assert(First[-1] == 'R'); + + const char *Last = ++First; + while (Last != End && *Last != '(') + ++Last; + if (Last == End) { + First = Last; // Hit the end... just give up. + return; + } + + StringRef Terminator(First, Last - First); + for (;;) { + // Move First to just past the next ")". + First = Last; + while (First != End && *First != ')') + ++First; + if (First == End) + return; + ++First; + + // Look ahead for the terminator sequence. + Last = First; + while (Last != End && size_t(Last - First) < Terminator.size() && + Terminator[Last - First] == *Last) + ++Last; + + // Check if we hit it (or the end of the file). + if (Last == End) { + First = Last; + return; + } + if (size_t(Last - First) < Terminator.size()) + continue; + if (*Last != '\"') + continue; + First = Last + 1; + return; + } +} + +static void skipString(const char *&First, const char *const End) { + assert(*First == '\'' || *First == '\"'); + const char Terminator = *First; + for (++First; First != End && *First != Terminator; ++First) + if (*First == '\\') + if (++First == End) + return; + if (First != End) + ++First; // Finish off the string. +} + +static void skipNewline(const char *&First, const char *End) { + assert(isVerticalWhitespace(*First)); + ++First; + if (First == End) + return; + + // Check for "\n\r" and "\r\n". + if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0])) + ++First; +} + +static void skipToNewlineRaw(const char *&First, const char *const End) { + for (;;) { + if (First == End) + return; + + if (isVerticalWhitespace(*First)) + return; + + while (!isVerticalWhitespace(*First)) + if (++First == End) + return; + + if (First[-1] != '\\') + return; + + ++First; // Keep going... + } +} + +static const char *reverseOverSpaces(const char *First, const char *Last) { + assert(First <= Last); + while (First != Last && isHorizontalWhitespace(Last[-1])) + --Last; + return Last; +} + +static void skipLineComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '/'); + First += 2; + skipToNewlineRaw(First, End); +} + +static void skipBlockComment(const char *&First, const char *const End) { + assert(First[0] == '/' && First[1] == '*'); + if (End - First < 4) { + First = End; + return; + } + for (First += 3; First != End; ++First) + if (First[-1] == '*' && First[0] == '/') { + ++First; + return; + } +} + +/// \returns True if the current single quotation mark character is a C++ 14 +/// digit separator. +static bool isQuoteCppDigitSeparator(const char *const Start, + const char *const Cur, + const char *const End) { + assert(*Cur == '\'' && "expected quotation character"); + // skipLine called in places where we don't expect a valid number + // body before `start` on the same line, so always return false at the start. + if (Start == Cur) + return false; + // The previous character must be a valid PP number character. + if (!isPreprocessingNumberBody(*(Cur - 1))) + return false; + // The next character should be a valid identifier body character. + return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); +} + +static void skipLine(const char *&First, const char *const End) { + do { + assert(First <= End); + if (First == End) + return; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + return; + } + const char *Start = First; + while (First != End && !isVerticalWhitespace(*First)) { + // Iterate over strings correctly to avoid comments and newlines. + if (*First == '\"' || + (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) { + if (isRawStringLiteral(Start, First)) + skipRawString(First, End); + else + skipString(First, End); + continue; + } + + // Iterate over comments correctly. + if (*First != '/' || End - First < 2) { + ++First; + continue; + } + + if (First[1] == '/') { + // "//...". + skipLineComment(First, End); + continue; + } + + if (First[1] != '*') { + ++First; + continue; + } + + // "/*...*/". + skipBlockComment(First, End); + } + if (First == End) + return; + + // Skip over the newline. + assert(isVerticalWhitespace(*First)); + skipNewline(First, End); + } while (First[-2] == '\\'); // Continue past line-continuations. +} + +static void skipDirective(StringRef Name, const char *&First, + const char *const End) { + if (llvm::StringSwitch(Name) + .Case("warning", true) + .Case("error", true) + .Default(false)) + // Do not process quotes or comments. + skipToNewlineRaw(First, End); + else + skipLine(First, End); +} + +void Minimizer::printToNewline(const char *&First, const char *const End) { + while (First != End && !isVerticalWhitespace(*First)) { + const char *Last = First; + do { + // Iterate over strings correctly to avoid comments and newlines. + if (*Last == '\"' || *Last == '\'') { + if (LLVM_UNLIKELY(isRawStringLiteral(First, Last))) + skipRawString(Last, End); + else + skipString(Last, End); + continue; + } + if (*Last != '/' || End - Last < 2) { + ++Last; + continue; // Gather the rest up to print verbatim. + } + + if (Last[1] != '/' && Last[1] != '*') { + ++Last; + continue; + } + + // Deal with "//..." and "/*...*/". + append(First, reverseOverSpaces(First, Last)); + First = Last; + + if (Last[1] == '/') { + skipLineComment(First, End); + return; + } + + put(' '); + skipBlockComment(First, End); + skipOverSpaces(First, End); + Last = First; + } while (Last != End && !isVerticalWhitespace(*Last)); + + // Print out the string. + if (Last == End || Last == First || Last[-1] != '\\') { + append(First, reverseOverSpaces(First, Last)); + return; + } + + // Print up to the backslash, backing up over spaces. + append(First, reverseOverSpaces(First, Last - 1)); + + First = Last; + skipNewline(First, End); + skipOverSpaces(First, End); + } +} + +static void skipWhitespace(const char *&First, const char *const End) { + for (;;) { + assert(First <= End); + skipOverSpaces(First, End); + + if (End - First < 2) + return; + + if (First[0] == '\\' && isVerticalWhitespace(First[1])) { + skipNewline(++First, End); + continue; + } + + // Check for a non-comment character. + if (First[0] != '/') + return; + + // "// ...". + if (First[1] == '/') { + skipLineComment(First, End); + return; + } + + // Cannot be a comment. + if (First[1] != '*') + return; + + // "/*...*/". + skipBlockComment(First, End); + } +} + +void Minimizer::printAdjacentModuleNameParts(const char *&First, + const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && (isIdentifierBody(*Last) || *Last == '.')); + append(First, Last); + First = Last; +} + +bool Minimizer::printAtImportBody(const char *&First, const char *const End) { + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (isVerticalWhitespace(*First)) { + skipNewline(First, End); + continue; + } + + // Found a semicolon. + if (*First == ';') { + put(*First++).put('\n'); + return false; + } + + // Don't handle macro expansions inside @import for now. + if (!isIdentifierBody(*First) && *First != '.') + return true; + + printAdjacentModuleNameParts(First, End); + } +} + +void Minimizer::printDirectiveBody(const char *&First, const char *const End) { + skipWhitespace(First, End); // Skip initial whitespace. + printToNewline(First, End); + while (Out.back() == ' ') + Out.pop_back(); + put('\n'); +} + +LLVM_NODISCARD static const char *lexRawIdentifier(const char *First, + const char *const End) { + assert(isIdentifierBody(*First) && "invalid identifer"); + const char *Last = First + 1; + while (Last != End && isIdentifierBody(*Last)) + ++Last; + return Last; +} + +LLVM_NODISCARD static const char * +getIdentifierContinuation(const char *First, const char *const End) { + if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1])) + return nullptr; + + ++First; + skipNewline(First, End); + if (First == End) + return nullptr; + return isIdentifierBody(First[0]) ? First : nullptr; +} + +Minimizer::IdInfo Minimizer::lexIdentifier(const char *First, + const char *const End) { + const char *Last = lexRawIdentifier(First, End); + const char *Next = getIdentifierContinuation(Last, End); + if (LLVM_LIKELY(!Next)) + return IdInfo{Last, StringRef(First, Last - First)}; + + // Slow path, where identifiers are split over lines. + SmallVector Id(First, Last); + while (Next) { + Last = lexRawIdentifier(Next, End); + Id.append(Next, Last); + Next = getIdentifierContinuation(Last, End); + } + return IdInfo{ + Last, + SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()}; +} + +void Minimizer::printAdjacentMacroArgs(const char *&First, + const char *const End) { + // Skip over parts of the body. + const char *Last = First; + do + ++Last; + while (Last != End && + (isIdentifierBody(*Last) || *Last == '.' || *Last == ',')); + append(First, Last); + First = Last; +} + +bool Minimizer::printMacroArgs(const char *&First, const char *const End) { + assert(*First == '('); + put(*First++); + for (;;) { + skipWhitespace(First, End); + if (First == End) + return true; + + if (*First == ')') { + put(*First++); + return false; + } + + // This is intentionally fairly liberal. + if (!(isIdentifierBody(*First) || *First == '.' || *First == ',')) + return true; + + printAdjacentMacroArgs(First, End); + } +} + +/// Looks for an identifier starting from Last. +/// +/// Updates "First" to just past the next identifier, if any. Returns true iff +/// the identifier matches "Id". +bool Minimizer::isNextIdentifier(StringRef Id, const char *&First, + const char *const End) { + skipWhitespace(First, End); + if (First == End || !isIdentifierHead(*First)) + return false; + + IdInfo FoundId = lexIdentifier(First, End); + First = FoundId.Last; + return FoundId.Name == Id; +} + +bool Minimizer::lexAt(const char *&First, const char *const End) { + // Handle "@import". + const char *ImportLoc = First++; + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + makeToken(decl_at_import); + append("@import "); + if (printAtImportBody(First, End)) + return reportError( + ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import); + skipWhitespace(First, End); + if (First == End) + return false; + if (!isVerticalWhitespace(*First)) + return reportError( + ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import); + skipNewline(First, End); + return false; +} + +bool Minimizer::lexDefine(const char *&First, const char *const End) { + makeToken(pp_define); + append("#define "); + skipWhitespace(First, End); + + if (!isIdentifierHead(*First)) + return reportError(First, diag::err_pp_macro_not_identifier); + + IdInfo Id = lexIdentifier(First, End); + const char *Last = Id.Last; + append(Id.Name); + if (Last == End) + return false; + if (*Last == '(') { + size_t Size = Out.size(); + if (printMacroArgs(Last, End)) { + // Be robust to bad macro arguments, since they can show up in disabled + // code. + Out.resize(Size); + append("(/* invalid */\n"); + skipLine(Last, End); + return false; + } + } + skipWhitespace(Last, End); + if (Last == End) + return false; + if (!isVerticalWhitespace(*Last)) + put(' '); + printDirectiveBody(Last, End); + First = Last; + return false; +} + +bool Minimizer::lexPragma(const char *&First, const char *const End) { + // #pragma. + if (!isNextIdentifier("clang", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang. + if (!isNextIdentifier("module", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module. + if (!isNextIdentifier("import", First, End)) { + skipLine(First, End); + return false; + } + + // #pragma clang module import. + makeToken(pp_pragma_import); + append("#pragma clang module import "); + printDirectiveBody(First, End); + return false; +} + +bool Minimizer::lexEndif(const char *&First, const char *const End) { + // Strip out "#else" if it's empty. + if (top() == pp_else) + popToken(); + + // Strip out "#elif" if they're empty. + while (top() == pp_elif) + popToken(); + + // If "#if" is empty, strip it and skip the "#endif". + if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) { + popToken(); + skipLine(First, End); + return false; + } + + return lexDefault(pp_endif, "endif", First, End); +} + +bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive, + const char *&First, const char *const End) { + makeToken(Kind); + put('#').append(Directive).put(' '); + printDirectiveBody(First, End); + return false; +} + +bool Minimizer::lexPPLine(const char *&First, const char *const End) { + assert(First != End); + + skipWhitespace(First, End); + assert(First <= End); + if (First == End) + return false; + + if (*First != '#' && *First != '@') { + skipLine(First, End); + assert(First <= End); + return false; + } + + // Handle "@import". + if (*First == '@') + return lexAt(First, End); + + // Handle preprocessing directives. + ++First; // Skip over '#'. + skipWhitespace(First, End); + + if (First == End) + return reportError(First, diag::err_pp_expected_eol); + + if (!isIdentifierHead(*First)) { + skipLine(First, End); + return false; + } + + // Figure out the token. + IdInfo Id = lexIdentifier(First, End); + First = Id.Last; + auto Kind = llvm::StringSwitch(Id.Name) + .Case("include", pp_include) + .Case("__include_macros", pp___include_macros) + .Case("define", pp_define) + .Case("undef", pp_undef) + .Case("import", pp_import) + .Case("include_next", pp_include_next) + .Case("if", pp_if) + .Case("ifdef", pp_ifdef) + .Case("ifndef", pp_ifndef) + .Case("elif", pp_elif) + .Case("else", pp_else) + .Case("endif", pp_endif) + .Case("pragma", pp_pragma_import) + .Default(pp_none); + if (Kind == pp_none) { + skipDirective(Id.Name, First, End); + return false; + } + + if (Kind == pp_endif) + return lexEndif(First, End); + + if (Kind == pp_define) + return lexDefine(First, End); + + if (Kind == pp_pragma_import) + return lexPragma(First, End); + + // Everything else. + return lexDefault(Kind, Id.Name, First, End); +} + +bool Minimizer::minimizeImpl(const char *First, const char *const End) { + while (First != End) + if (lexPPLine(First, End)) + return true; + return false; +} + +bool Minimizer::minimize() { + bool Error = minimizeImpl(Input.begin(), Input.end()); + + if (!Error) { + // Add a trailing newline and an EOF on success. + if (!Out.empty() && Out.back() != '\n') + Out.push_back('\n'); + makeToken(pp_eof); + } + + // Null-terminate the output. This way the memory buffer that's passed to + // Clang will not have to worry about the terminating '\0'. + Out.push_back(0); + Out.pop_back(); + return Error; +} + +bool clang::minimizeSourceToDependencyDirectives( + StringRef Input, SmallVectorImpl &Output, + SmallVectorImpl &Tokens, DiagnosticsEngine *Diags, + SourceLocation InputSourceLoc) { + Output.clear(); + Tokens.clear(); + return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize(); +} diff --git a/test/Frontend/minimize_source_to_dependency_directives.c b/test/Frontend/minimize_source_to_dependency_directives.c new file mode 100644 index 0000000000..39f608b264 --- /dev/null +++ b/test/Frontend/minimize_source_to_dependency_directives.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -print-dependency-directives-minimized-source %s > %t +// RUN: echo END. >> %t +// RUN: FileCheck < %t %s + +#ifdef FOO +#include "a.h" +#else +void skipThisCode(); +#endif + +// CHECK: #ifdef FOO +// CHECK-NEXT: #include "a.h" +// CHECK-NEXT: #endif +// CHECK-NEXT: END. diff --git a/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m b/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m new file mode 100644 index 0000000000..ef210af5c5 --- /dev/null +++ b/test/Lexer/minimize_source_to_dependency_directives_at_import_extra_tokens.m @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 + +@import x; a // expected-error {{unexpected extra tokens at end of @import declaration}} diff --git a/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m b/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m new file mode 100644 index 0000000000..8962e31a05 --- /dev/null +++ b/test/Lexer/minimize_source_to_dependency_directives_at_import_missing_semi.m @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 + +@import x // expected-error {{could not find ';' after @import}} diff --git a/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c b/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c new file mode 100644 index 0000000000..fa4ff7dcb8 --- /dev/null +++ b/test/Lexer/minimize_source_to_dependency_directives_invalid_macro_name.c @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -verify -print-dependency-directives-minimized-source %s 2>&1 + +#define 0 0 // expected-error {{macro name must be an identifier}} diff --git a/unittests/Lex/CMakeLists.txt b/unittests/Lex/CMakeLists.txt index bb0f66d860..dbc8328eda 100644 --- a/unittests/Lex/CMakeLists.txt +++ b/unittests/Lex/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS ) add_clang_unittest(LexTests + DependencyDirectivesSourceMinimizerTest.cpp HeaderMapTest.cpp HeaderSearchTest.cpp LexerTest.cpp diff --git a/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp b/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp new file mode 100644 index 0000000000..7feb6c97fd --- /dev/null +++ b/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -0,0 +1,508 @@ +//===- unittests/Lex/DependencyDirectivesSourceMinimizer.cpp - -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Lex/DependencyDirectivesSourceMinimizer.h" +#include "llvm/ADT/SmallString.h" +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; +using namespace clang::minimize_source_to_dependency_directives; + +namespace clang { + +bool minimizeSourceToDependencyDirectives(StringRef Input, + SmallVectorImpl &Out) { + SmallVector Tokens; + return minimizeSourceToDependencyDirectives(Input, Out, Tokens); +} + +} // end namespace clang + +namespace { + +TEST(MinimizeSourceToDependencyDirectivesTest, Empty) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("", Out, Tokens)); + EXPECT_TRUE(Out.empty()); + ASSERT_EQ(1u, Tokens.size()); + ASSERT_EQ(pp_eof, Tokens.back().K); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens)); + EXPECT_TRUE(Out.empty()); + ASSERT_EQ(1u, Tokens.size()); + ASSERT_EQ(pp_eof, Tokens.back().K); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define A\n" + "#undef A\n" + "#endif\n" + "#if A\n" + "#ifdef A\n" + "#ifndef A\n" + "#elif A\n" + "#else\n" + "#include \n" + "#include_next \n" + "#__include_macros \n" + "#import \n" + "@import A;\n" + "#pragma clang module import A\n", + Out, Tokens)); + EXPECT_EQ(pp_define, Tokens[0].K); + EXPECT_EQ(pp_undef, Tokens[1].K); + EXPECT_EQ(pp_endif, Tokens[2].K); + EXPECT_EQ(pp_if, Tokens[3].K); + EXPECT_EQ(pp_ifdef, Tokens[4].K); + EXPECT_EQ(pp_ifndef, Tokens[5].K); + EXPECT_EQ(pp_elif, Tokens[6].K); + EXPECT_EQ(pp_else, Tokens[7].K); + EXPECT_EQ(pp_include, Tokens[8].K); + EXPECT_EQ(pp_include_next, Tokens[9].K); + EXPECT_EQ(pp___include_macros, Tokens[10].K); + EXPECT_EQ(pp_import, Tokens[11].K); + EXPECT_EQ(decl_at_import, Tokens[12].K); + EXPECT_EQ(pp_pragma_import, Tokens[13].K); + EXPECT_EQ(pp_eof, Tokens[14].K); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Define) { + SmallVector Out; + SmallVector Tokens; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO", Out, Tokens)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + ASSERT_EQ(2u, Tokens.size()); + ASSERT_EQ(pp_define, Tokens.front().K); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineSpacing) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO\n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO \n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO a \n\n\n", Out)); + EXPECT_STREQ("#define MACRO a\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO\n\n\n", Out)); + EXPECT_STREQ("#define MACRO\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineMacroArguments) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO()", Out)); + EXPECT_STREQ("#define MACRO()\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, b...)", Out)); + EXPECT_STREQ("#define MACRO(a,b...)\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO content", Out)); + EXPECT_STREQ("#define MACRO content\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO con tent ", Out)); + EXPECT_STREQ("#define MACRO con tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO() con tent ", Out)); + EXPECT_STREQ("#define MACRO() con tent\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineInvalidMacroArguments) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO((a))", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO(", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a * b)", Out)); + EXPECT_STREQ("#define MACRO(/* invalid */\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineHorizontalWhitespace) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO(\t)\tcon \t tent\t", Out)); + EXPECT_STREQ("#define MACRO() con \t tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO(\f)\fcon \f tent\f", Out)); + EXPECT_STREQ("#define MACRO() con \f tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO(\v)\vcon \v tent\v", Out)); + EXPECT_STREQ("#define MACRO() con \v tent\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#define MACRO \t\v\f\v\t con\f\t\vtent\v\f \v", Out)); + EXPECT_STREQ("#define MACRO con\f\t\vtent\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineMultilineArgs) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a \\\n" + " )", + Out)); + EXPECT_STREQ("#define MACRO(a)\n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\n" + " b) \\\n" + " call((a), \\\n" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, + DefineMultilineArgsCarriageReturn) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\r" + " b) \\\r" + " call((a), \\\r" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, + DefineMultilineArgsCarriageReturnNewline) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\r\n" + " b) \\\r\n" + " call((a), \\\r\n" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, + DefineMultilineArgsNewlineCarriageReturn) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO(a, \\\n\r" + " b) \\\n\r" + " call((a), \\\n\r" + " (b))", + Out)); + EXPECT_STREQ("#define MACRO(a,b) call((a),(b))\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineNumber) { + SmallVector Out; + + ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define 0\n", Out)); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoName) { + SmallVector Out; + + ASSERT_TRUE(minimizeSourceToDependencyDirectives("#define &\n", Out)); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, DefineNoWhitespace) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND&\n", Out)); + EXPECT_STREQ("#define AND &\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define AND\\\n" + "&\n", + Out)); + EXPECT_STREQ("#define AND &\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, MultilineComment) { + SmallVector Out; + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#define MACRO a/*\n" + " /*\n" + "#define MISSING abc\n" + " /*\n" + " /* something */ \n" + "#include /* \"def\" */ \n", + Out)); + EXPECT_STREQ("#define MACRO a\n" + "#include \n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, MultilineCommentInStrings) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define MACRO1 \"/*\"\n" + "#define MACRO2 \"*/\"\n", + Out)); + EXPECT_STREQ("#define MACRO1 \"/*\"\n" + "#define MACRO2 \"*/\"\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Ifdef) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#define B\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifdef A\n" + "#define B\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#define B\n" + "#elif B\n" + "#define C\n" + "#elif C\n" + "#define D\n" + "#else\n" + "#define E\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifdef A\n" + "#define B\n" + "#elif B\n" + "#define C\n" + "#elif C\n" + "#define D\n" + "#else\n" + "#define E\n" + "#endif\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIfdef) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifdef A\n" + "#elif B\n" + "#elif C\n" + "#else D\n" + "#endif\n", + Out)); + EXPECT_STREQ("", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Pragma) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#pragma clang module\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma clang module impor\n", Out)); + EXPECT_STREQ("", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#pragma clang module import\n", Out)); + EXPECT_STREQ("#pragma clang module import\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, Include) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include \"A\"\n", Out)); + EXPECT_STREQ("#include \"A\"\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#include \n", Out)); + EXPECT_STREQ("#include \n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#include_next \n", Out)); + EXPECT_STREQ("#include_next \n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#import \n", Out)); + EXPECT_STREQ("#import \n", Out.data()); + + ASSERT_FALSE( + minimizeSourceToDependencyDirectives("#__include_macros \n", Out)); + EXPECT_STREQ("#__include_macros \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A;\n", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import A;\n", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); + EXPECT_STREQ("@import A;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out)); + EXPECT_STREQ("@import A.B;\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out)); + EXPECT_STREQ("@import A.B;\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, AtImportFailures) { + SmallVector Out; + + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import A\n", Out)); + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import MACRO(A);\n", Out)); + ASSERT_TRUE(minimizeSourceToDependencyDirectives("@import \" \";\n", Out)); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, RawStringLiteral) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#ifndef GUARD\n" + "#define GUARD\n" + "R\"()\"\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#ifndef GUARD\n" + "#define GUARD\n" + R"raw(static constexpr char bytes[] = R"(-?:\,[]{}#&*!|>'"%@`)";)raw" + "\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives( + "#ifndef GUARD\n" + "#define GUARD\n" + R"raw(static constexpr char bytes[] = R"abc(-?:\,[]{}#&*!|>'"%@`)abc";)raw" + "\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, SplitIdentifier) { + SmallVector Out; + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#if\\\n" + "ndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out)); + EXPECT_STREQ("#ifndef GUARD\n" + "#define GUARD\n" + "#endif\n", + Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" + "RD\n", + Out)); + EXPECT_STREQ("#define GUARD\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\r" + "RD\n", + Out)); + EXPECT_STREQ("#define GUARD\n", Out.data()); + + ASSERT_FALSE(minimizeSourceToDependencyDirectives("#define GUA\\\n" + " RD\n", + Out)); + EXPECT_STREQ("#define GUA RD\n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, PoundWarningAndError) { + SmallVector Out; + + for (auto Source : { + "#warning '\n#include \n", + "#warning \"\n#include \n", + "#warning /*\n#include \n", + "#warning \\\n#include \n#include \n", + "#error '\n#include \n", + "#error \"\n#include \n", + "#error /*\n#include \n", + "#error \\\n#include \n#include \n", + }) { + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); + } + + for (auto Source : { + "#warning \\\n#include \n", + "#error \\\n#include \n", + "#if MACRO\n#warning '\n#endif\n", + "#if MACRO\n#warning \"\n#endif\n", + "#if MACRO\n#warning /*\n#endif\n", + "#if MACRO\n#error '\n#endif\n", + "#if MACRO\n#error \"\n#endif\n", + "#if MACRO\n#error /*\n#endif\n", + }) { + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("", Out.data()); + } +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteral) { + SmallVector Out; + + StringRef Source = R"( +#include +int a = 0'1; +int b = 0xfa'af'fa; +int c = 12 ' '; +#include +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n#include \n", Out.data()); +} + +} // end anonymous namespace -- 2.40.0