From: Russell Gallop Date: Wed, 22 May 2019 12:50:52 +0000 (+0000) Subject: Revert r361148 "[Syntax] Introduce TokenBuffer, start clangToolingSyntax library" X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22432f5ef1df6e1d102ec2f056d59ec69584385d;p=clang Revert r361148 "[Syntax] Introduce TokenBuffer, start clangToolingSyntax library" Also reverted r361264 "[Syntax] Rename TokensTest to SyntaxTests. NFC" which built on it. This is because there were hitting an assert on bots http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-ubuntu-fast http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-windows10pro-fast git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@361377 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Tooling/Syntax/Tokens.h b/include/clang/Tooling/Syntax/Tokens.h deleted file mode 100644 index 0a0d47b9f3..0000000000 --- a/include/clang/Tooling/Syntax/Tokens.h +++ /dev/null @@ -1,302 +0,0 @@ -//===- Tokens.h - collect tokens from preprocessing --------------*- C++-*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Record tokens that a preprocessor emits and define operations to map between -// the tokens written in a file and tokens produced by the preprocessor. -// -// When running the compiler, there are two token streams we are interested in: -// - "spelled" tokens directly correspond to a substring written in some -// source file. -// - "expanded" tokens represent the result of preprocessing, parses consumes -// this token stream to produce the AST. -// -// Expanded tokens correspond directly to locations found in the AST, allowing -// to find subranges of the token stream covered by various AST nodes. Spelled -// tokens correspond directly to the source code written by the user. -// -// To allow composing these two use-cases, we also define operations that map -// between expanded and spelled tokens that produced them (macro calls, -// directives, etc). -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H -#define LLVM_CLANG_TOOLING_SYNTAX_TOKENS_H - -#include "clang/Basic/FileManager.h" -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TokenKinds.h" -#include "clang/Lex/Token.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/raw_ostream.h" -#include -#include - -namespace clang { -class Preprocessor; - -namespace syntax { - -/// A half-open character range inside a particular file, the start offset is -/// included and the end offset is excluded from the range. -struct FileRange { - /// EXPECTS: File.isValid() && Begin <= End. - FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset); - /// EXPECTS: BeginLoc.isValid() && BeginLoc.isFileID(). - FileRange(const SourceManager &SM, SourceLocation BeginLoc, unsigned Length); - /// EXPECTS: BeginLoc.isValid() && BeginLoc.isFileID(), Begin <= End and files - /// are the same. - FileRange(const SourceManager &SM, SourceLocation BeginLoc, - SourceLocation EndLoc); - - FileID file() const { return File; } - /// Start is a start offset (inclusive) in the corresponding file. - unsigned beginOffset() const { return Begin; } - /// End offset (exclusive) in the corresponding file. - unsigned endOffset() const { return End; } - - unsigned length() const { return End - Begin; } - - /// Gets the substring that this FileRange refers to. - llvm::StringRef text(const SourceManager &SM) const; - - friend bool operator==(const FileRange &L, const FileRange &R) { - return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End); - } - friend bool operator!=(const FileRange &L, const FileRange &R) { - return !(L == R); - } - -private: - FileID File; - unsigned Begin; - unsigned End; -}; - -/// For debugging purposes. -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const FileRange &R); - -/// A token coming directly from a file or from a macro invocation. Has just -/// enough information to locate the token in the source code. -/// Can represent both expanded and spelled tokens. -class Token { -public: - Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind) - : Location(Location), Length(Length), Kind(Kind) {} - /// EXPECTS: clang::Token is not an annotation token. - explicit Token(const clang::Token &T); - - tok::TokenKind kind() const { return Kind; } - /// Location of the first character of a token. - SourceLocation location() const { return Location; } - /// Location right after the last character of a token. - SourceLocation endLocation() const { - return Location.getLocWithOffset(Length); - } - unsigned length() const { return Length; } - - /// Get the substring covered by the token. Note that will include all - /// digraphs, newline continuations, etc. E.g. tokens for 'int' and - /// in\ - /// t - /// both have the same kind tok::kw_int, but results of text() are different. - llvm::StringRef text(const SourceManager &SM) const; - - /// Gets a range of this token. - /// EXPECTS: token comes from a file, not from a macro expansion. - FileRange range(const SourceManager &SM) const; - - /// Given two tokens inside the same file, returns a file range that starts at - /// \p First and ends at \p Last. - /// EXPECTS: First and Last are file tokens from the same file, Last starts - /// after First. - static FileRange range(const SourceManager &SM, const syntax::Token &First, - const syntax::Token &Last); - - std::string dumpForTests(const SourceManager &SM) const; - /// For debugging purposes. - std::string str() const; - -private: - SourceLocation Location; - unsigned Length; - tok::TokenKind Kind; -}; -/// For debugging purposes. Equivalent to a call to Token::str(). -llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T); - -/// A list of tokens obtained by preprocessing a text buffer and operations to -/// map between the expanded and spelled tokens, i.e. TokenBuffer has -/// information about two token streams: -/// 1. Expanded tokens: tokens produced by the preprocessor after all macro -/// replacements, -/// 2. Spelled tokens: corresponding directly to the source code of a file -/// before any macro replacements occurred. -/// Here's an example to illustrate a difference between those two: -/// #define FOO 10 -/// int a = FOO; -/// -/// Spelled tokens are {'#','define','FOO','10','int','a','=','FOO',';'}. -/// Expanded tokens are {'int','a','=','10',';','eof'}. -/// -/// Note that the expanded token stream has a tok::eof token at the end, the -/// spelled tokens never store a 'eof' token. -/// -/// The full list expanded tokens can be obtained with expandedTokens(). Spelled -/// tokens for each of the files can be obtained via spelledTokens(FileID). -/// -/// To map between the expanded and spelled tokens use findSpelledByExpanded(). -/// -/// To build a token buffer use the TokenCollector class. You can also compute -/// the spelled tokens of a file using the tokenize() helper. -/// -/// FIXME: allow to map from spelled to expanded tokens when use-case shows up. -class TokenBuffer { -public: - TokenBuffer(const SourceManager &SourceMgr) : SourceMgr(&SourceMgr) {} - /// All tokens produced by the preprocessor after all macro replacements, - /// directives, etc. Source locations found in the clang AST will always - /// point to one of these tokens. - /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split - /// into two '>' tokens by the parser. However, TokenBuffer currently - /// keeps it as a single '>>' token. - llvm::ArrayRef expandedTokens() const { - return ExpandedTokens; - } - - /// Find the subrange of spelled tokens that produced the corresponding \p - /// Expanded tokens. - /// - /// EXPECTS: \p Expanded is a subrange of expandedTokens(). - /// - /// Will fail if the expanded tokens do not correspond to a - /// sequence of spelled tokens. E.g. for the following example: - /// - /// #define FIRST f1 f2 f3 - /// #define SECOND s1 s2 s3 - /// - /// a FIRST b SECOND c // expanded tokens are: a f1 f2 f3 b s1 s2 s3 c - /// - /// the results would be: - /// expanded => spelled - /// ------------------------ - /// a => a - /// s1 s2 s3 => SECOND - /// a f1 f2 f3 => a FIRST - /// a f1 => can't map - /// s1 s2 => can't map - /// - /// If \p Expanded is empty, the returned value is llvm::None. - /// Complexity is logarithmic. - llvm::Optional> - spelledForExpanded(llvm::ArrayRef Expanded) const; - - /// Lexed tokens of a file before preprocessing. E.g. for the following input - /// #define DECL(name) int name = 10 - /// DECL(a); - /// spelledTokens() returns {"#", "define", "DECL", "(", "name", ")", "eof"}. - /// FIXME: we do not yet store tokens of directives, like #include, #define, - /// #pragma, etc. - llvm::ArrayRef spelledTokens(FileID FID) const; - - std::string dumpForTests() const; - -private: - /// Describes a mapping between a continuous subrange of spelled tokens and - /// expanded tokens. Represents macro expansions, preprocessor directives, - /// conditionally disabled pp regions, etc. - /// #define FOO 1+2 - /// #define BAR(a) a + 1 - /// FOO // invocation #1, tokens = {'1','+','2'}, macroTokens = {'FOO'}. - /// BAR(1) // invocation #2, tokens = {'a', '+', '1'}, - /// macroTokens = {'BAR', '(', '1', ')'}. - struct Mapping { - // Positions in the corresponding spelled token stream. The corresponding - // range is never empty. - unsigned BeginSpelled = 0; - unsigned EndSpelled = 0; - // Positions in the expanded token stream. The corresponding range can be - // empty. - unsigned BeginExpanded = 0; - unsigned EndExpanded = 0; - - /// For debugging purposes. - std::string str() const; - }; - /// Spelled tokens of the file with information about the subranges. - struct MarkedFile { - /// Lexed, but not preprocessed, tokens of the file. These map directly to - /// text in the corresponding files and include tokens of all preprocessor - /// directives. - /// FIXME: spelled tokens don't change across FileID that map to the same - /// FileEntry. We could consider deduplicating them to save memory. - std::vector SpelledTokens; - /// A sorted list to convert between the spelled and expanded token streams. - std::vector Mappings; - /// The first expanded token produced for this FileID. - unsigned BeginExpanded = 0; - unsigned EndExpanded = 0; - }; - - friend class TokenCollector; - - /// Maps a single expanded token to its spelled counterpart or a mapping that - /// produced it. - std::pair - spelledForExpandedToken(const syntax::Token *Expanded) const; - - /// Token stream produced after preprocessing, conceputally this captures the - /// same stream as 'clang -E' (excluding the preprocessor directives like - /// #file, etc.). - std::vector ExpandedTokens; - llvm::DenseMap Files; - // The value is never null, pointer instead of reference to avoid disabling - // implicit assignment operator. - const SourceManager *SourceMgr; -}; - -/// Lex the text buffer, corresponding to \p FID, in raw mode and record the -/// resulting spelled tokens. Does minimal post-processing on raw identifiers, -/// setting the appropriate token kind (instead of the raw_identifier reported -/// by lexer in raw mode). This is a very low-level function, most users should -/// prefer to use TokenCollector. Lexing in raw mode produces wildly different -/// results from what one might expect when running a C++ frontend, e.g. -/// preprocessor does not run at all. -/// The result will *not* have a 'eof' token at the end. -std::vector tokenize(FileID FID, const SourceManager &SM, - const LangOptions &LO); - -/// Collects tokens for the main file while running the frontend action. An -/// instance of this object should be created on -/// FrontendAction::BeginSourceFile() and the results should be consumed after -/// FrontendAction::Execute() finishes. -class TokenCollector { -public: - /// Adds the hooks to collect the tokens. Should be called before the - /// preprocessing starts, i.e. as a part of BeginSourceFile() or - /// CreateASTConsumer(). - TokenCollector(Preprocessor &P); - - /// Finalizes token collection. Should be called after preprocessing is - /// finished, i.e. after running Execute(). - LLVM_NODISCARD TokenBuffer consume() &&; - -private: - class Builder; - std::vector Expanded; - Preprocessor &PP; -}; - -} // namespace syntax -} // namespace clang - -#endif diff --git a/lib/Tooling/CMakeLists.txt b/lib/Tooling/CMakeLists.txt index 6ed2f25acd..4b671e299a 100644 --- a/lib/Tooling/CMakeLists.txt +++ b/lib/Tooling/CMakeLists.txt @@ -7,7 +7,6 @@ add_subdirectory(Core) add_subdirectory(Inclusions) add_subdirectory(Refactoring) add_subdirectory(ASTDiff) -add_subdirectory(Syntax) add_clang_library(clangTooling AllTUsExecution.cpp diff --git a/lib/Tooling/Syntax/CMakeLists.txt b/lib/Tooling/Syntax/CMakeLists.txt deleted file mode 100644 index e4c2660057..0000000000 --- a/lib/Tooling/Syntax/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -set(LLVM_LINK_COMPONENTS Support) - -add_clang_library(clangToolingSyntax - Tokens.cpp - - LINK_LIBS - clangBasic - clangFrontend - clangLex - ) diff --git a/lib/Tooling/Syntax/Tokens.cpp b/lib/Tooling/Syntax/Tokens.cpp deleted file mode 100644 index f291f18c48..0000000000 --- a/lib/Tooling/Syntax/Tokens.cpp +++ /dev/null @@ -1,509 +0,0 @@ -//===- Tokens.cpp - collect tokens from preprocessing ---------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#include "clang/Tooling/Syntax/Tokens.h" - -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/IdentifierTable.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TokenKinds.h" -#include "clang/Lex/Preprocessor.h" -#include "clang/Lex/Token.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include -#include - -using namespace clang; -using namespace clang::syntax; - -syntax::Token::Token(const clang::Token &T) - : Token(T.getLocation(), T.getLength(), T.getKind()) { - assert(!T.isAnnotation()); -} - -llvm::StringRef syntax::Token::text(const SourceManager &SM) const { - bool Invalid = false; - const char *Start = SM.getCharacterData(location(), &Invalid); - assert(!Invalid); - return llvm::StringRef(Start, length()); -} - -FileRange syntax::Token::range(const SourceManager &SM) const { - assert(location().isFileID() && "must be a spelled token"); - FileID File; - unsigned StartOffset; - std::tie(File, StartOffset) = SM.getDecomposedLoc(location()); - return FileRange(File, StartOffset, StartOffset + length()); -} - -FileRange syntax::Token::range(const SourceManager &SM, - const syntax::Token &First, - const syntax::Token &Last) { - auto F = First.range(SM); - auto L = Last.range(SM); - assert(F.file() == L.file() && "tokens from different files"); - assert(F.endOffset() <= L.beginOffset() && "wrong order of tokens"); - return FileRange(F.file(), F.beginOffset(), L.endOffset()); -} - -llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) { - return OS << T.str(); -} - -FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset) - : File(File), Begin(BeginOffset), End(EndOffset) { - assert(File.isValid()); - assert(BeginOffset <= EndOffset); -} - -FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc, - unsigned Length) { - assert(BeginLoc.isValid()); - assert(BeginLoc.isFileID()); - - std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc); - End = Begin + Length; -} -FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc, - SourceLocation EndLoc) { - assert(BeginLoc.isValid()); - assert(BeginLoc.isFileID()); - assert(EndLoc.isValid()); - assert(EndLoc.isFileID()); - assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc)); - assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc)); - - std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc); - End = SM.getFileOffset(EndLoc); -} - -llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, - const FileRange &R) { - return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})", - R.file().getHashValue(), R.beginOffset(), - R.endOffset()); -} - -llvm::StringRef FileRange::text(const SourceManager &SM) const { - bool Invalid = false; - StringRef Text = SM.getBufferData(File, &Invalid); - if (Invalid) - return ""; - assert(Begin <= Text.size()); - assert(End <= Text.size()); - return Text.substr(Begin, length()); -} - -std::pair -TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { - assert(Expanded); - assert(ExpandedTokens.data() <= Expanded && - Expanded < ExpandedTokens.data() + ExpandedTokens.size()); - - auto FileIt = Files.find( - SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location()))); - assert(FileIt != Files.end() && "no file for an expanded token"); - - const MarkedFile &File = FileIt->second; - - unsigned ExpandedIndex = Expanded - ExpandedTokens.data(); - // Find the first mapping that produced tokens after \p Expanded. - auto It = llvm::bsearch(File.Mappings, [&](const Mapping &M) { - return ExpandedIndex < M.BeginExpanded; - }); - // Our token could only be produced by the previous mapping. - if (It == File.Mappings.begin()) { - // No previous mapping, no need to modify offsets. - return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded], nullptr}; - } - --It; // 'It' now points to last mapping that started before our token. - - // Check if the token is part of the mapping. - if (ExpandedIndex < It->EndExpanded) - return {&File.SpelledTokens[It->BeginSpelled], /*Mapping*/ &*It}; - - // Not part of the mapping, use the index from previous mapping to compute the - // corresponding spelled token. - return { - &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)], - /*Mapping*/ nullptr}; -} - -llvm::ArrayRef TokenBuffer::spelledTokens(FileID FID) const { - auto It = Files.find(FID); - assert(It != Files.end()); - return It->second.SpelledTokens; -} - -std::string TokenBuffer::Mapping::str() const { - return llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})", - BeginSpelled, EndSpelled, BeginExpanded, EndExpanded); -} - -llvm::Optional> -TokenBuffer::spelledForExpanded(llvm::ArrayRef Expanded) const { - // Mapping an empty range is ambiguous in case of empty mappings at either end - // of the range, bail out in that case. - if (Expanded.empty()) - return llvm::None; - - // FIXME: also allow changes uniquely mapping to macro arguments. - - const syntax::Token *BeginSpelled; - const Mapping *BeginMapping; - std::tie(BeginSpelled, BeginMapping) = - spelledForExpandedToken(&Expanded.front()); - - const syntax::Token *LastSpelled; - const Mapping *LastMapping; - std::tie(LastSpelled, LastMapping) = - spelledForExpandedToken(&Expanded.back()); - - FileID FID = SourceMgr->getFileID(BeginSpelled->location()); - // FIXME: Handle multi-file changes by trying to map onto a common root. - if (FID != SourceMgr->getFileID(LastSpelled->location())) - return llvm::None; - - const MarkedFile &File = Files.find(FID)->second; - - // Do not allow changes that cross macro expansion boundaries. - unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data(); - unsigned EndExpanded = Expanded.end() - ExpandedTokens.data(); - if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded) - return llvm::None; - if (LastMapping && EndExpanded < LastMapping->EndExpanded) - return llvm::None; - // All is good, return the result. - return llvm::makeArrayRef( - BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled - : BeginSpelled, - LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled - : LastSpelled + 1); -} - -std::vector syntax::tokenize(FileID FID, const SourceManager &SM, - const LangOptions &LO) { - std::vector Tokens; - IdentifierTable Identifiers(LO); - auto AddToken = [&](clang::Token T) { - // Fill the proper token kind for keywords, etc. - if (T.getKind() == tok::raw_identifier && !T.needsCleaning() && - !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases. - clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier()); - T.setIdentifierInfo(&II); - T.setKind(II.getTokenID()); - } - Tokens.push_back(syntax::Token(T)); - }; - - Lexer L(FID, SM.getBuffer(FID), SM, LO); - - clang::Token T; - while (!L.LexFromRawLexer(T)) - AddToken(T); - // 'eof' is only the last token if the input is null-terminated. Never store - // it, for consistency. - if (T.getKind() != tok::eof) - AddToken(T); - return Tokens; -} - -/// Fills in the TokenBuffer by tracing the run of a preprocessor. The -/// implementation tracks the tokens, macro expansions and directives coming -/// from the preprocessor and: -/// - for each token, figures out if it is a part of an expanded token stream, -/// spelled token stream or both. Stores the tokens appropriately. -/// - records mappings from the spelled to expanded token ranges, e.g. for macro -/// expansions. -/// FIXME: also properly record: -/// - #include directives, -/// - #pragma, #line and other PP directives, -/// - skipped pp regions, -/// - ... - -TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) { - // Collect the expanded token stream during preprocessing. - PP.setTokenWatcher([this](const clang::Token &T) { - if (T.isAnnotation()) - return; - DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs() - << "Token: " - << syntax::Token(T).dumpForTests( - this->PP.getSourceManager()) - << "\n" - - ); - Expanded.push_back(syntax::Token(T)); - }); -} - -/// Builds mappings and spelled tokens in the TokenBuffer based on the expanded -/// token stream. -class TokenCollector::Builder { -public: - Builder(std::vector Expanded, const SourceManager &SM, - const LangOptions &LangOpts) - : Result(SM), SM(SM), LangOpts(LangOpts) { - Result.ExpandedTokens = std::move(Expanded); - } - - TokenBuffer build() && { - buildSpelledTokens(); - - // Walk over expanded tokens and spelled tokens in parallel, building the - // mappings between those using source locations. - - // The 'eof' token is special, it is not part of spelled token stream. We - // handle it separately at the end. - assert(!Result.ExpandedTokens.empty()); - assert(Result.ExpandedTokens.back().kind() == tok::eof); - for (unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) { - // (!) I might be updated by the following call. - processExpandedToken(I); - } - - // 'eof' not handled in the loop, do it here. - assert(SM.getMainFileID() == - SM.getFileID(Result.ExpandedTokens.back().location())); - fillGapUntil(Result.Files[SM.getMainFileID()], - Result.ExpandedTokens.back().location(), - Result.ExpandedTokens.size() - 1); - Result.Files[SM.getMainFileID()].EndExpanded = Result.ExpandedTokens.size(); - - // Some files might have unaccounted spelled tokens at the end, add an empty - // mapping for those as they did not have expanded counterparts. - fillGapsAtEndOfFiles(); - - return std::move(Result); - } - -private: - /// Process the next token in an expanded stream and move corresponding - /// spelled tokens, record any mapping if needed. - /// (!) \p I will be updated if this had to skip tokens, e.g. for macros. - void processExpandedToken(unsigned &I) { - auto L = Result.ExpandedTokens[I].location(); - if (L.isMacroID()) { - processMacroExpansion(SM.getExpansionRange(L), I); - return; - } - if (L.isFileID()) { - auto FID = SM.getFileID(L); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, L, I); - - // Skip the token. - assert(File.SpelledTokens[NextSpelled[FID]].location() == L && - "no corresponding token in the spelled stream"); - ++NextSpelled[FID]; - return; - } - } - - /// Skipped expanded and spelled tokens of a macro expansion that covers \p - /// SpelledRange. Add a corresponding mapping. - /// (!) \p I will be the index of the last token in an expansion after this - /// function returns. - void processMacroExpansion(CharSourceRange SpelledRange, unsigned &I) { - auto FID = SM.getFileID(SpelledRange.getBegin()); - assert(FID == SM.getFileID(SpelledRange.getEnd())); - TokenBuffer::MarkedFile &File = Result.Files[FID]; - - fillGapUntil(File, SpelledRange.getBegin(), I); - - TokenBuffer::Mapping M; - // Skip the spelled macro tokens. - std::tie(M.BeginSpelled, M.EndSpelled) = - consumeSpelledUntil(File, SpelledRange.getEnd().getLocWithOffset(1)); - // Skip all expanded tokens from the same macro expansion. - M.BeginExpanded = I; - for (; I + 1 < Result.ExpandedTokens.size(); ++I) { - auto NextL = Result.ExpandedTokens[I + 1].location(); - if (!NextL.isMacroID() || - SM.getExpansionLoc(NextL) != SpelledRange.getBegin()) - break; - } - M.EndExpanded = I + 1; - - // Add a resulting mapping. - File.Mappings.push_back(M); - } - - /// Initializes TokenBuffer::Files and fills spelled tokens and expanded - /// ranges for each of the files. - void buildSpelledTokens() { - for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) { - auto FID = - SM.getFileID(SM.getExpansionLoc(Result.ExpandedTokens[I].location())); - auto It = Result.Files.try_emplace(FID); - TokenBuffer::MarkedFile &File = It.first->second; - - File.EndExpanded = I + 1; - if (!It.second) - continue; // we have seen this file before. - - // This is the first time we see this file. - File.BeginExpanded = I; - File.SpelledTokens = tokenize(FID, SM, LangOpts); - } - } - - /// Consumed spelled tokens until location L is reached (token starting at L - /// is not included). Returns the indicies of the consumed range. - std::pair - consumeSpelledUntil(TokenBuffer::MarkedFile &File, SourceLocation L) { - assert(L.isFileID()); - FileID FID; - unsigned Offset; - std::tie(FID, Offset) = SM.getDecomposedLoc(L); - - // (!) we update the index in-place. - unsigned &SpelledI = NextSpelled[FID]; - unsigned Before = SpelledI; - for (; SpelledI < File.SpelledTokens.size() && - SM.getFileOffset(File.SpelledTokens[SpelledI].location()) < Offset; - ++SpelledI) { - } - return std::make_pair(Before, /*After*/ SpelledI); - }; - - /// Consumes spelled tokens until location \p L is reached and adds a mapping - /// covering the consumed tokens. The mapping will point to an empty expanded - /// range at position \p ExpandedIndex. - void fillGapUntil(TokenBuffer::MarkedFile &File, SourceLocation L, - unsigned ExpandedIndex) { - unsigned BeginSpelledGap, EndSpelledGap; - std::tie(BeginSpelledGap, EndSpelledGap) = consumeSpelledUntil(File, L); - if (BeginSpelledGap == EndSpelledGap) - return; // No gap. - TokenBuffer::Mapping M; - M.BeginSpelled = BeginSpelledGap; - M.EndSpelled = EndSpelledGap; - M.BeginExpanded = M.EndExpanded = ExpandedIndex; - File.Mappings.push_back(M); - }; - - /// Adds empty mappings for unconsumed spelled tokens at the end of each file. - void fillGapsAtEndOfFiles() { - for (auto &F : Result.Files) { - unsigned Next = NextSpelled[F.first]; - if (F.second.SpelledTokens.size() == Next) - continue; // All spelled tokens are accounted for. - - // Record a mapping for the gap at the end of the spelled tokens. - TokenBuffer::Mapping M; - M.BeginSpelled = Next; - M.EndSpelled = F.second.SpelledTokens.size(); - M.BeginExpanded = F.second.EndExpanded; - M.EndExpanded = F.second.EndExpanded; - - F.second.Mappings.push_back(M); - } - } - - TokenBuffer Result; - /// For each file, a position of the next spelled token we will consume. - llvm::DenseMap NextSpelled; - const SourceManager &SM; - const LangOptions &LangOpts; -}; - -TokenBuffer TokenCollector::consume() && { - PP.setTokenWatcher(nullptr); - return Builder(std::move(Expanded), PP.getSourceManager(), PP.getLangOpts()) - .build(); -} - -std::string syntax::Token::str() const { - return llvm::formatv("Token({0}, length = {1})", tok::getTokenName(kind()), - length()); -} - -std::string syntax::Token::dumpForTests(const SourceManager &SM) const { - return llvm::formatv("{0} {1}", tok::getTokenName(kind()), text(SM)); -} - -std::string TokenBuffer::dumpForTests() const { - auto PrintToken = [this](const syntax::Token &T) -> std::string { - if (T.kind() == tok::eof) - return ""; - return T.text(*SourceMgr); - }; - - auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS, - llvm::ArrayRef Tokens) { - if (Tokens.size() == 1) { - assert(Tokens[0].kind() == tok::eof); - OS << ""; - return; - } - OS << Tokens[0].text(*SourceMgr); - for (unsigned I = 1; I < Tokens.size(); ++I) { - if (Tokens[I].kind() == tok::eof) - continue; - OS << " " << PrintToken(Tokens[I]); - } - }; - - std::string Dump; - llvm::raw_string_ostream OS(Dump); - - OS << "expanded tokens:\n" - << " "; - DumpTokens(OS, ExpandedTokens); - OS << "\n"; - - std::vector Keys; - for (auto F : Files) - Keys.push_back(F.first); - llvm::sort(Keys); - - for (FileID ID : Keys) { - const MarkedFile &File = Files.find(ID)->second; - auto *Entry = SourceMgr->getFileEntryForID(ID); - if (!Entry) - continue; // Skip builtin files. - OS << llvm::formatv("file '{0}'\n", Entry->getName()) - << " spelled tokens:\n" - << " "; - DumpTokens(OS, File.SpelledTokens); - OS << "\n"; - - if (File.Mappings.empty()) { - OS << " no mappings.\n"; - continue; - } - OS << " mappings:\n"; - for (auto &M : File.Mappings) { - OS << llvm::formatv( - " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n", - PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled, - M.EndSpelled == File.SpelledTokens.size() - ? "" - : PrintToken(File.SpelledTokens[M.EndSpelled]), - M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]), - M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]), - M.EndExpanded); - } - } - return OS.str(); -} diff --git a/unittests/Tooling/CMakeLists.txt b/unittests/Tooling/CMakeLists.txt index af8a35d925..844288da62 100644 --- a/unittests/Tooling/CMakeLists.txt +++ b/unittests/Tooling/CMakeLists.txt @@ -72,6 +72,3 @@ target_link_libraries(ToolingTests clangToolingInclusions clangToolingRefactor ) - - -add_subdirectory(Syntax) diff --git a/unittests/Tooling/Syntax/CMakeLists.txt b/unittests/Tooling/Syntax/CMakeLists.txt deleted file mode 100644 index 4150a9ff1e..0000000000 --- a/unittests/Tooling/Syntax/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -set(LLVM_LINK_COMPONENTS - ${LLVM_TARGETS_TO_BUILD} - Support - ) - -add_clang_unittest(SyntaxTests - TokensTest.cpp -) - -target_link_libraries(SyntaxTests - PRIVATE - clangAST - clangBasic - clangFrontend - clangLex - clangSerialization - clangTooling - clangToolingSyntax - LLVMTestingSupport - ) diff --git a/unittests/Tooling/Syntax/TokensTest.cpp b/unittests/Tooling/Syntax/TokensTest.cpp deleted file mode 100644 index ef3d8f3689..0000000000 --- a/unittests/Tooling/Syntax/TokensTest.cpp +++ /dev/null @@ -1,654 +0,0 @@ -//===- TokensTest.cpp -----------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clang/Tooling/Syntax/Tokens.h" -#include "clang/AST/ASTConsumer.h" -#include "clang/AST/Expr.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticIDs.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/FileSystemOptions.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Basic/TokenKinds.def" -#include "clang/Basic/TokenKinds.h" -#include "clang/Frontend/CompilerInstance.h" -#include "clang/Frontend/FrontendAction.h" -#include "clang/Frontend/Utils.h" -#include "clang/Lex/Lexer.h" -#include "clang/Lex/PreprocessorOptions.h" -#include "clang/Lex/Token.h" -#include "clang/Tooling/Tooling.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/IntrusiveRefCntPtr.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/VirtualFileSystem.h" -#include "llvm/Support/raw_os_ostream.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Testing/Support/Annotations.h" -#include "llvm/Testing/Support/SupportHelpers.h" -#include -#include -#include -#include -#include -#include -#include - -using namespace clang; -using namespace clang::syntax; - -using llvm::ValueIs; -using ::testing::AllOf; -using ::testing::Contains; -using ::testing::ElementsAre; -using ::testing::Matcher; -using ::testing::Not; -using ::testing::StartsWith; - -namespace { -// Checks the passed ArrayRef has the same begin() and end() iterators as the -// argument. -MATCHER_P(SameRange, A, "") { - return A.begin() == arg.begin() && A.end() == arg.end(); -} -// Matchers for syntax::Token. -MATCHER_P(Kind, K, "") { return arg.kind() == K; } -MATCHER_P2(HasText, Text, SourceMgr, "") { - return arg.text(*SourceMgr) == Text; -} -/// Checks the start and end location of a token are equal to SourceRng. -MATCHER_P(RangeIs, SourceRng, "") { - return arg.location() == SourceRng.first && - arg.endLocation() == SourceRng.second; -} - -class TokenCollectorTest : public ::testing::Test { -public: - /// Run the clang frontend, collect the preprocessed tokens from the frontend - /// invocation and store them in this->Buffer. - /// This also clears SourceManager before running the compiler. - void recordTokens(llvm::StringRef Code) { - class RecordTokens : public ASTFrontendAction { - public: - explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} - - bool BeginSourceFileAction(CompilerInstance &CI) override { - assert(!Collector && "expected only a single call to BeginSourceFile"); - Collector.emplace(CI.getPreprocessor()); - return true; - } - void EndSourceFileAction() override { - assert(Collector && "BeginSourceFileAction was never called"); - Result = std::move(*Collector).consume(); - } - - std::unique_ptr - CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { - return llvm::make_unique(); - } - - private: - TokenBuffer &Result; - llvm::Optional Collector; - }; - - constexpr const char *FileName = "./input.cpp"; - FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy("")); - // Prepare to run a compiler. - std::vector Args = {"tok-test", "-std=c++03", "-fsyntax-only", - FileName}; - auto CI = createInvocationFromCommandLine(Args, Diags, FS); - assert(CI); - CI->getFrontendOpts().DisableFree = false; - CI->getPreprocessorOpts().addRemappedFile( - FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); - CompilerInstance Compiler; - Compiler.setInvocation(std::move(CI)); - if (!Diags->getClient()) - Diags->setClient(new IgnoringDiagConsumer); - Compiler.setDiagnostics(Diags.get()); - Compiler.setFileManager(FileMgr.get()); - Compiler.setSourceManager(SourceMgr.get()); - - this->Buffer = TokenBuffer(*SourceMgr); - RecordTokens Recorder(this->Buffer); - ASSERT_TRUE(Compiler.ExecuteAction(Recorder)) - << "failed to run the frontend"; - } - - /// Record the tokens and return a test dump of the resulting buffer. - std::string collectAndDump(llvm::StringRef Code) { - recordTokens(Code); - return Buffer.dumpForTests(); - } - - // Adds a file to the test VFS. - void addFile(llvm::StringRef Path, llvm::StringRef Contents) { - if (!FS->addFile(Path, time_t(), - llvm::MemoryBuffer::getMemBufferCopy(Contents))) { - ADD_FAILURE() << "could not add a file to VFS: " << Path; - } - } - - /// Add a new file, run syntax::tokenize() on it and return the results. - std::vector tokenize(llvm::StringRef Text) { - // FIXME: pass proper LangOptions. - return syntax::tokenize( - SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)), - *SourceMgr, LangOptions()); - } - - // Specialized versions of matchers that hide the SourceManager from clients. - Matcher HasText(std::string Text) const { - return ::HasText(Text, SourceMgr.get()); - } - Matcher RangeIs(llvm::Annotations::Range R) const { - std::pair Ls; - Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) - .getLocWithOffset(R.Begin); - Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID()) - .getLocWithOffset(R.End); - return ::RangeIs(Ls); - } - - /// Finds a subrange in O(n * m). - template - llvm::ArrayRef findSubrange(llvm::ArrayRef Subrange, - llvm::ArrayRef Range, Eq F) { - for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) { - auto It = Begin; - for (auto ItSub = Subrange.begin(); - ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) { - if (!F(*ItSub, *It)) - goto continue_outer; - } - return llvm::makeArrayRef(Begin, It); - continue_outer:; - } - return llvm::makeArrayRef(Range.end(), Range.end()); - } - - /// Finds a subrange in \p Tokens that match the tokens specified in \p Query. - /// The match should be unique. \p Query is a whitespace-separated list of - /// tokens to search for. - llvm::ArrayRef - findTokenRange(llvm::StringRef Query, llvm::ArrayRef Tokens) { - llvm::SmallVector QueryTokens; - Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false); - if (QueryTokens.empty()) { - ADD_FAILURE() << "will not look for an empty list of tokens"; - std::abort(); - } - // An equality test for search. - auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) { - return Q == T.text(*SourceMgr); - }; - // Find a match. - auto Found = - findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches); - if (Found.begin() == Tokens.end()) { - ADD_FAILURE() << "could not find the subrange for " << Query; - std::abort(); - } - // Check that the match is unique. - if (findSubrange(llvm::makeArrayRef(QueryTokens), - llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches) - .begin() != Tokens.end()) { - ADD_FAILURE() << "match is not unique for " << Query; - std::abort(); - } - return Found; - }; - - // Specialized versions of findTokenRange for expanded and spelled tokens. - llvm::ArrayRef findExpanded(llvm::StringRef Query) { - return findTokenRange(Query, Buffer.expandedTokens()); - } - llvm::ArrayRef findSpelled(llvm::StringRef Query, - FileID File = FileID()) { - if (!File.isValid()) - File = SourceMgr->getMainFileID(); - return findTokenRange(Query, Buffer.spelledTokens(File)); - } - - // Data fields. - llvm::IntrusiveRefCntPtr Diags = - new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); - IntrusiveRefCntPtr FS = - new llvm::vfs::InMemoryFileSystem; - llvm::IntrusiveRefCntPtr FileMgr = - new FileManager(FileSystemOptions(), FS); - llvm::IntrusiveRefCntPtr SourceMgr = - new SourceManager(*Diags, *FileMgr); - /// Contains last result of calling recordTokens(). - TokenBuffer Buffer = TokenBuffer(*SourceMgr); -}; - -TEST_F(TokenCollectorTest, RawMode) { - EXPECT_THAT(tokenize("int main() {}"), - ElementsAre(Kind(tok::kw_int), - AllOf(HasText("main"), Kind(tok::identifier)), - Kind(tok::l_paren), Kind(tok::r_paren), - Kind(tok::l_brace), Kind(tok::r_brace))); - // Comments are ignored for now. - EXPECT_THAT(tokenize("/* foo */int a; // more comments"), - ElementsAre(Kind(tok::kw_int), - AllOf(HasText("a"), Kind(tok::identifier)), - Kind(tok::semi))); -} - -TEST_F(TokenCollectorTest, Basic) { - std::pair TestCases[] = { - {"int main() {}", - R"(expanded tokens: - int main ( ) { } -file './input.cpp' - spelled tokens: - int main ( ) { } - no mappings. -)"}, - // All kinds of whitespace are ignored. - {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n", - R"(expanded tokens: - int main ( ) { } -file './input.cpp' - spelled tokens: - int main ( ) { } - no mappings. -)"}, - // Annotation tokens are ignored. - {R"cpp( - #pragma GCC visibility push (public) - #pragma GCC visibility pop - )cpp", - R"(expanded tokens: - -file './input.cpp' - spelled tokens: - # pragma GCC visibility push ( public ) # pragma GCC visibility pop - mappings: - ['#'_0, ''_13) => [''_0, ''_0) -)"}}; - for (auto &Test : TestCases) - EXPECT_EQ(collectAndDump(Test.first), Test.second) - << collectAndDump(Test.first); -} - -TEST_F(TokenCollectorTest, Locations) { - // Check locations of the tokens. - llvm::Annotations Code(R"cpp( - $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]] - )cpp"); - recordTokens(Code.code()); - // Check expanded tokens. - EXPECT_THAT( - Buffer.expandedTokens(), - ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), - AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), - AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), - AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), - AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))), - Kind(tok::eof))); - // Check spelled tokens. - EXPECT_THAT( - Buffer.spelledTokens(SourceMgr->getMainFileID()), - ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))), - AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))), - AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))), - AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))), - AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))))); -} - -TEST_F(TokenCollectorTest, MacroDirectives) { - // Macro directives are not stored anywhere at the moment. - std::string Code = R"cpp( - #define FOO a - #include "unresolved_file.h" - #undef FOO - #ifdef X - #else - #endif - #ifndef Y - #endif - #if 1 - #elif 2 - #else - #endif - #pragma once - #pragma something lalala - - int a; - )cpp"; - std::string Expected = - "expanded tokens:\n" - " int a ;\n" - "file './input.cpp'\n" - " spelled tokens:\n" - " # define FOO a # include \"unresolved_file.h\" # undef FOO " - "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else " - "# endif # pragma once # pragma something lalala int a ;\n" - " mappings:\n" - " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n"; - EXPECT_EQ(collectAndDump(Code), Expected); -} - -TEST_F(TokenCollectorTest, MacroReplacements) { - std::pair TestCases[] = { - // A simple object-like macro. - {R"cpp( - #define INT int const - INT a; - )cpp", - R"(expanded tokens: - int const a ; -file './input.cpp' - spelled tokens: - # define INT int const INT a ; - mappings: - ['#'_0, 'INT'_5) => ['int'_0, 'int'_0) - ['INT'_5, 'a'_6) => ['int'_0, 'a'_2) -)"}, - // A simple function-like macro. - {R"cpp( - #define INT(a) const int - INT(10+10) a; - )cpp", - R"(expanded tokens: - const int a ; -file './input.cpp' - spelled tokens: - # define INT ( a ) const int INT ( 10 + 10 ) a ; - mappings: - ['#'_0, 'INT'_8) => ['const'_0, 'const'_0) - ['INT'_8, 'a'_14) => ['const'_0, 'a'_2) -)"}, - // Recursive macro replacements. - {R"cpp( - #define ID(X) X - #define INT int const - ID(ID(INT)) a; - )cpp", - R"(expanded tokens: - int const a ; -file './input.cpp' - spelled tokens: - # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ; - mappings: - ['#'_0, 'ID'_12) => ['int'_0, 'int'_0) - ['ID'_12, 'a'_19) => ['int'_0, 'a'_2) -)"}, - // A little more complicated recursive macro replacements. - {R"cpp( - #define ADD(X, Y) X+Y - #define MULT(X, Y) X*Y - - int a = ADD(MULT(1,2), MULT(3,ADD(4,5))); - )cpp", - "expanded tokens:\n" - " int a = 1 * 2 + 3 * 4 + 5 ;\n" - "file './input.cpp'\n" - " spelled tokens:\n" - " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int " - "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n" - " mappings:\n" - " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n" - " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"}, - // Empty macro replacement. - {R"cpp( - #define EMPTY - #define EMPTY_FUNC(X) - EMPTY - EMPTY_FUNC(1+2+3) - )cpp", - R"(expanded tokens: - -file './input.cpp' - spelled tokens: - # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 ) - mappings: - ['#'_0, ''_18) => [''_0, ''_0) -)"}, - // File ends with a macro replacement. - {R"cpp( - #define FOO 10+10; - int a = FOO - )cpp", - R"(expanded tokens: - int a = 10 + 10 ; -file './input.cpp' - spelled tokens: - # define FOO 10 + 10 ; int a = FOO - mappings: - ['#'_0, 'int'_7) => ['int'_0, 'int'_0) - ['FOO'_10, ''_11) => ['10'_3, ''_7) -)"}}; - - for (auto &Test : TestCases) - EXPECT_EQ(Test.second, collectAndDump(Test.first)) - << collectAndDump(Test.first); -} - -TEST_F(TokenCollectorTest, SpecialTokens) { - // Tokens coming from concatenations. - recordTokens(R"cpp( - #define CONCAT(a, b) a ## b - int a = CONCAT(1, 2); - )cpp"); - EXPECT_THAT(std::vector(Buffer.expandedTokens()), - Contains(HasText("12"))); - // Multi-line tokens with slashes at the end. - recordTokens("i\\\nn\\\nt"); - EXPECT_THAT(Buffer.expandedTokens(), - ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")), - Kind(tok::eof))); - // FIXME: test tokens with digraphs and UCN identifiers. -} - -TEST_F(TokenCollectorTest, LateBoundTokens) { - // The parser eventually breaks the first '>>' into two tokens ('>' and '>'), - // but we choose to record them as a single token (for now). - llvm::Annotations Code(R"cpp( - template - struct foo { int a; }; - int bar = foo>]]().a; - int baz = 10 $op[[>>]] 2; - )cpp"); - recordTokens(Code.code()); - EXPECT_THAT(std::vector(Buffer.expandedTokens()), - AllOf(Contains(AllOf(Kind(tok::greatergreater), - RangeIs(Code.range("br")))), - Contains(AllOf(Kind(tok::greatergreater), - RangeIs(Code.range("op")))))); -} - -TEST_F(TokenCollectorTest, DelayedParsing) { - llvm::StringLiteral Code = R"cpp( - struct Foo { - int method() { - // Parser will visit method bodies and initializers multiple times, but - // TokenBuffer should only record the first walk over the tokens; - return 100; - } - int a = 10; - - struct Subclass { - void foo() { - Foo().method(); - } - }; - }; - )cpp"; - std::string ExpectedTokens = - "expanded tokens:\n" - " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct " - "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n"; - EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens)); -} - -TEST_F(TokenCollectorTest, MultiFile) { - addFile("./foo.h", R"cpp( - #define ADD(X, Y) X+Y - int a = 100; - #include "bar.h" - )cpp"); - addFile("./bar.h", R"cpp( - int b = ADD(1, 2); - #define MULT(X, Y) X*Y - )cpp"); - llvm::StringLiteral Code = R"cpp( - #include "foo.h" - int c = ADD(1, MULT(2,3)); - )cpp"; - - std::string Expected = R"(expanded tokens: - int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ; -file './input.cpp' - spelled tokens: - # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ; - mappings: - ['#'_0, 'int'_3) => ['int'_12, 'int'_12) - ['ADD'_6, ';'_17) => ['1'_15, ';'_20) -file './foo.h' - spelled tokens: - # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h" - mappings: - ['#'_0, 'int'_11) => ['int'_0, 'int'_0) - ['#'_16, ''_19) => ['int'_5, 'int'_5) -file './bar.h' - spelled tokens: - int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y - mappings: - ['ADD'_3, ';'_9) => ['1'_8, ';'_11) - ['#'_10, ''_21) => ['int'_12, 'int'_12) -)"; - - EXPECT_EQ(Expected, collectAndDump(Code)) - << "input: " << Code << "\nresults: " << collectAndDump(Code); -} - -class TokenBufferTest : public TokenCollectorTest {}; - -TEST_F(TokenBufferTest, SpelledByExpanded) { - recordTokens(R"cpp( - a1 a2 a3 b1 b2 - )cpp"); - - // Sanity check: expanded and spelled tokens are stored separately. - EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2")))); - // Searching for subranges of expanded tokens should give the corresponding - // spelled ones. - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")), - ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2")))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), - ValueIs(SameRange(findSpelled("a1 a2 a3")))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), - ValueIs(SameRange(findSpelled("b1 b2")))); - - // Test search on simple macro expansions. - recordTokens(R"cpp( - #define A a1 a2 a3 - #define B b1 b2 - - A split B - )cpp"); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), - ValueIs(SameRange(findSpelled("A split B")))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), - ValueIs(SameRange(findSpelled("A split").drop_back()))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), - ValueIs(SameRange(findSpelled("split B").drop_front()))); - // Ranges not fully covering macro invocations should fail. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")), - llvm::None); - - // Recursive macro invocations. - recordTokens(R"cpp( - #define ID(x) x - #define B b1 b2 - - ID(ID(ID(a1) a2 a3)) split ID(B) - )cpp"); - - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")), - ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")), - ValueIs(SameRange(findSpelled("ID ( B )")))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")), - ValueIs(SameRange(findSpelled( - "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")))); - // Ranges crossing macro call boundaries. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")), - llvm::None); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")), - llvm::None); - // FIXME: next two examples should map to macro arguments, but currently they - // fail. - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None); - EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None); - - // Empty macro expansions. - recordTokens(R"cpp( - #define EMPTY - #define ID(X) X - - EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1 - EMPTY EMPTY ID(4 5 6) split2 - ID(7 8 9) EMPTY EMPTY - )cpp"); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")), - ValueIs(SameRange(findSpelled("ID ( 1 2 3 )")))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")), - ValueIs(SameRange(findSpelled("ID ( 4 5 6 )")))); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")), - ValueIs(SameRange(findSpelled("ID ( 7 8 9 )")))); - - // Empty mappings coming from various directives. - recordTokens(R"cpp( - #define ID(X) X - ID(1) - #pragma lalala - not_mapped - )cpp"); - EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")), - ValueIs(SameRange(findSpelled("not_mapped")))); -} - -TEST_F(TokenBufferTest, TokensToFileRange) { - addFile("./foo.h", "token_from_header"); - llvm::Annotations Code(R"cpp( - #define FOO token_from_expansion - #include "./foo.h" - $all[[$i[[int]] a = FOO;]] - )cpp"); - recordTokens(Code.code()); - - auto &SM = *SourceMgr; - - // Two simple examples. - auto Int = findExpanded("int").front(); - auto Semi = findExpanded(";").front(); - EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin, - Code.range("i").End)); - EXPECT_EQ(syntax::Token::range(SM, Int, Semi), - FileRange(SM.getMainFileID(), Code.range("all").Begin, - Code.range("all").End)); - // We don't test assertion failures because death tests are slow. -} - -} // namespace \ No newline at end of file