From c50c6ff49aa3648ae031349de6f09439f52425f0 Mon Sep 17 00:00:00 2001 From: Argyrios Kyrtzidis Date: Thu, 16 May 2013 21:37:39 +0000 Subject: [PATCH] [Lexer] Improve Lexer::getSourceText() when the given range deals with function macro arguments. This is a modified version of a patch by Manuel Klimek. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@182055 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/SourceManager.h | 24 +++ lib/Basic/SourceManager.cpp | 108 +++++++++++ lib/Lex/Lexer.cpp | 57 +++--- unittests/Lex/LexerTest.cpp | 280 ++++++++++++++++++++++------ 4 files changed, 381 insertions(+), 88 deletions(-) diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h index eccbf1ede7..5fd5a47a4a 100644 --- a/include/clang/Basic/SourceManager.h +++ b/include/clang/Basic/SourceManager.h @@ -1161,6 +1161,22 @@ public: /// expansion but not the expansion of an argument to a function-like macro. bool isMacroBodyExpansion(SourceLocation Loc) const; + /// \brief Returns true if the given MacroID location points at the beginning + /// of the immediate macro expansion. + /// + /// \param MacroBegin If non-null and function returns true, it is set to the + /// begin location of the immediate macro expansion. + bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroBegin = 0) const; + + /// \brief Returns true if the given MacroID location points at the character + /// end of the immediate macro expansion. + /// + /// \param MacroEnd If non-null and function returns true, it is set to the + /// character end location of the immediate macro expansion. + bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroEnd = 0) const; + /// \brief Returns true if \p Loc is inside the [\p Start, +\p Length) /// chunk of the source location address space. /// @@ -1570,6 +1586,14 @@ private: return SLocOffset < getSLocEntryByID(FID.ID+1).getOffset(); } + /// \brief Returns the previous in-order FileID or an invalid FileID if there + /// is no previous one. + FileID getPreviousFileID(FileID FID) const; + + /// \brief Returns the next in-order FileID or an invalid FileID if there is + /// no next one. + FileID getNextFileID(FileID FID) const; + /// \brief Create a new fileID for the specified ContentCache and /// include position. /// diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp index d6dc6d6328..6994b30093 100644 --- a/lib/Basic/SourceManager.cpp +++ b/lib/Basic/SourceManager.cpp @@ -536,6 +536,43 @@ SourceManager::getFakeContentCacheForRecovery() const { return FakeContentCacheForRecovery; } +/// \brief Returns the previous in-order FileID or an invalid FileID if there +/// is no previous one. +FileID SourceManager::getPreviousFileID(FileID FID) const { + if (FID.isInvalid()) + return FileID(); + + int ID = FID.ID; + if (ID == -1) + return FileID(); + + if (ID > 0) { + if (ID-1 == 0) + return FileID(); + } else if (unsigned(-(ID-1) - 2) >= LoadedSLocEntryTable.size()) { + return FileID(); + } + + return FileID::get(ID-1); +} + +/// \brief Returns the next in-order FileID or an invalid FileID if there is +/// no next one. +FileID SourceManager::getNextFileID(FileID FID) const { + if (FID.isInvalid()) + return FileID(); + + int ID = FID.ID; + if (ID > 0) { + if (unsigned(ID+1) >= local_sloc_entry_size()) + return FileID(); + } else if (ID+1 >= -1) { + return FileID(); + } + + return FileID::get(ID+1); +} + //===----------------------------------------------------------------------===// // Methods to create new FileID's and macro expansions. //===----------------------------------------------------------------------===// @@ -998,6 +1035,77 @@ bool SourceManager::isMacroBodyExpansion(SourceLocation Loc) const { return Expansion.isMacroBodyExpansion(); } +bool SourceManager::isAtStartOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroBegin) const { + assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc"); + + std::pair DecompLoc = getDecomposedLoc(Loc); + if (DecompLoc.second > 0) + return false; // Does not point at the start of expansion range. + + bool Invalid = false; + const SrcMgr::ExpansionInfo &ExpInfo = + getSLocEntry(DecompLoc.first, &Invalid).getExpansion(); + if (Invalid) + return false; + SourceLocation ExpLoc = ExpInfo.getExpansionLocStart(); + + if (ExpInfo.isMacroArgExpansion()) { + // For macro argument expansions, check if the previous FileID is part of + // the same argument expansion, in which case this Loc is not at the + // beginning of the expansion. + FileID PrevFID = getPreviousFileID(DecompLoc.first); + if (!PrevFID.isInvalid()) { + const SrcMgr::SLocEntry &PrevEntry = getSLocEntry(PrevFID, &Invalid); + if (Invalid) + return false; + if (PrevEntry.isExpansion() && + PrevEntry.getExpansion().getExpansionLocStart() == ExpLoc) + return false; + } + } + + if (MacroBegin) + *MacroBegin = ExpLoc; + return true; +} + +bool SourceManager::isAtEndOfImmediateMacroExpansion(SourceLocation Loc, + SourceLocation *MacroEnd) const { + assert(Loc.isValid() && Loc.isMacroID() && "Expected a valid macro loc"); + + FileID FID = getFileID(Loc); + SourceLocation NextLoc = Loc.getLocWithOffset(1); + if (isInFileID(NextLoc, FID)) + return false; // Does not point at the end of expansion range. + + bool Invalid = false; + const SrcMgr::ExpansionInfo &ExpInfo = + getSLocEntry(FID, &Invalid).getExpansion(); + if (Invalid) + return false; + + if (ExpInfo.isMacroArgExpansion()) { + // For macro argument expansions, check if the next FileID is part of the + // same argument expansion, in which case this Loc is not at the end of the + // expansion. + FileID NextFID = getNextFileID(FID); + if (!NextFID.isInvalid()) { + const SrcMgr::SLocEntry &NextEntry = getSLocEntry(NextFID, &Invalid); + if (Invalid) + return false; + if (NextEntry.isExpansion() && + NextEntry.getExpansion().getExpansionLocStart() == + ExpInfo.getExpansionLocStart()) + return false; + } + } + + if (MacroEnd) + *MacroEnd = ExpInfo.getExpansionLocEnd(); + return true; +} + //===----------------------------------------------------------------------===// // Queries about the code at a SourceLocation. diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 66498b1a2c..e58581ee06 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -798,14 +798,10 @@ bool Lexer::isAtStartOfMacroExpansion(SourceLocation loc, SourceLocation *MacroBegin) { assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc"); - std::pair infoLoc = SM.getDecomposedLoc(loc); - // FIXME: If the token comes from the macro token paste operator ('##') - // this function will always return false; - if (infoLoc.second > 0) - return false; // Does not point at the start of token. - - SourceLocation expansionLoc = - SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart(); + SourceLocation expansionLoc; + if (!SM.isAtStartOfImmediateMacroExpansion(loc, &expansionLoc)) + return false; + if (expansionLoc.isFileID()) { // No other macro expansions, this is the first. if (MacroBegin) @@ -829,16 +825,11 @@ bool Lexer::isAtEndOfMacroExpansion(SourceLocation loc, if (tokLen == 0) return false; - FileID FID = SM.getFileID(loc); - SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1); - if (SM.isInFileID(afterLoc, FID)) - return false; // Still in the same FileID, does not point to the last token. - - // FIXME: If the token comes from the macro token paste operator ('##') - // or the stringify operator ('#') this function will always return false; + SourceLocation afterLoc = loc.getLocWithOffset(tokLen); + SourceLocation expansionLoc; + if (!SM.isAtEndOfImmediateMacroExpansion(afterLoc, &expansionLoc)) + return false; - SourceLocation expansionLoc = - SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd(); if (expansionLoc.isFileID()) { // No other macro expansions. if (MacroEnd) @@ -916,25 +907,25 @@ CharSourceRange Lexer::makeFileCharRange(CharSourceRange Range, return makeRangeFromFileLocs(Range, SM, LangOpts); } - FileID FID; - unsigned BeginOffs; - llvm::tie(FID, BeginOffs) = SM.getDecomposedLoc(Begin); - if (FID.isInvalid()) + bool Invalid = false; + const SrcMgr::SLocEntry &BeginEntry = SM.getSLocEntry(SM.getFileID(Begin), + &Invalid); + if (Invalid) return CharSourceRange(); - unsigned EndOffs; - if (!SM.isInFileID(End, FID, &EndOffs) || - BeginOffs > EndOffs) - return CharSourceRange(); + if (BeginEntry.getExpansion().isMacroArgExpansion()) { + const SrcMgr::SLocEntry &EndEntry = SM.getSLocEntry(SM.getFileID(End), + &Invalid); + if (Invalid) + return CharSourceRange(); - const SrcMgr::SLocEntry *E = &SM.getSLocEntry(FID); - const SrcMgr::ExpansionInfo &Expansion = E->getExpansion(); - if (Expansion.isMacroArgExpansion() && - Expansion.getSpellingLoc().isFileID()) { - SourceLocation SpellLoc = Expansion.getSpellingLoc(); - Range.setBegin(SpellLoc.getLocWithOffset(BeginOffs)); - Range.setEnd(SpellLoc.getLocWithOffset(EndOffs)); - return makeRangeFromFileLocs(Range, SM, LangOpts); + if (EndEntry.getExpansion().isMacroArgExpansion() && + BeginEntry.getExpansion().getExpansionLocStart() == + EndEntry.getExpansion().getExpansionLocStart()) { + Range.setBegin(SM.getImmediateSpellingLoc(Begin)); + Range.setEnd(SM.getImmediateSpellingLoc(End)); + return makeFileCharRange(Range, SM, LangOpts); + } } return CharSourceRange(); diff --git a/unittests/Lex/LexerTest.cpp b/unittests/Lex/LexerTest.cpp index c9b1840e1c..a8e25cb2a3 100644 --- a/unittests/Lex/LexerTest.cpp +++ b/unittests/Lex/LexerTest.cpp @@ -28,6 +28,20 @@ using namespace clang; namespace { +class VoidModuleLoader : public ModuleLoader { + virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, + ModuleIdPath Path, + Module::NameVisibilityKind Visibility, + bool IsInclusionDirective) { + return ModuleLoadResult(); + } + + virtual void makeModuleVisible(Module *Mod, + Module::NameVisibilityKind Visibility, + SourceLocation ImportLoc, + bool Complain) { } +}; + // The test fixture. class LexerTest : public ::testing::Test { protected: @@ -42,6 +56,48 @@ protected: Target = TargetInfo::CreateTargetInfo(Diags, &*TargetOpts); } + std::vector CheckLex(StringRef Source, + ArrayRef ExpectedTokens) { + MemoryBuffer *buf = MemoryBuffer::getMemBuffer(Source); + (void) SourceMgr.createMainFileIDForMemBuffer(buf); + + VoidModuleLoader ModLoader; + HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts, + Target.getPtr()); + Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(), + SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/ 0, + /*OwnsHeaderSearch =*/ false, + /*DelayInitialization =*/ false); + PP.EnterMainSourceFile(); + + std::vector toks; + while (1) { + Token tok; + PP.Lex(tok); + if (tok.is(tok::eof)) + break; + toks.push_back(tok); + } + + EXPECT_EQ(ExpectedTokens.size(), toks.size()); + for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) { + EXPECT_EQ(ExpectedTokens[i], toks[i].getKind()); + } + + return toks; + } + + std::string getSourceText(Token Begin, Token End) { + bool Invalid; + StringRef Str = + Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange( + Begin.getLocation(), End.getLocation())), + SourceMgr, LangOpts, &Invalid); + if (Invalid) + return ""; + return Str; + } + FileSystemOptions FileMgrOpts; FileManager FileMgr; IntrusiveRefCntPtr DiagID; @@ -52,65 +108,179 @@ protected: IntrusiveRefCntPtr Target; }; -class VoidModuleLoader : public ModuleLoader { - virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, - ModuleIdPath Path, - Module::NameVisibilityKind Visibility, - bool IsInclusionDirective) { - return ModuleLoadResult(); - } +TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); - virtual void makeModuleVisible(Module *Mod, - Module::NameVisibilityKind Visibility, - SourceLocation ImportLoc, - bool Complain) { } -}; + std::vector toks = CheckLex("#define M(x) x\n" + "M(f(M(i)))", + ExpectedTokens); + + EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(M(i) c)", + ExpectedTokens); + + EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0])); +} + +TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(c c M(i))", + ExpectedTokens); + + EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(M(i) c c)", + ExpectedTokens); + + EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1])); +} + +TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(c M(i)) M(M(i) c)", + ExpectedTokens); + + EXPECT_EQ("", getSourceText(toks[1], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) M(x##c)\n" + "M(f(C(i)))", + ExpectedTokens); + + EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "f(M(M(i)))", + ExpectedTokens); + EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "M(f(i))", + ExpectedTokens); + EXPECT_EQ("i", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) x\n" + "f(C(M(i)))", + ExpectedTokens); + EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2])); +} + +TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) c x\n" + "f(C(M(i)))", + ExpectedTokens); + EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); +} + +TEST_F(LexerTest, GetSourceTextExpandsRecursively) { + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::l_paren); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_paren); + + std::vector toks = CheckLex("#define M(x) x\n" + "#define C(x) c M(x)\n" + "C(f(M(i)))", + ExpectedTokens); + EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3])); +} TEST_F(LexerTest, LexAPI) { - const char *source = - "#define M(x) [x]\n" - "#define N(x) x\n" - "#define INN(x) x\n" - "#define NOF1 INN(val)\n" - "#define NOF2 val\n" - "M(foo) N([bar])\n" - "N(INN(val)) N(NOF1) N(NOF2) N(val)"; - - MemoryBuffer *buf = MemoryBuffer::getMemBuffer(source); - (void)SourceMgr.createMainFileIDForMemBuffer(buf); - - VoidModuleLoader ModLoader; - HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts, - Target.getPtr()); - Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(), - SourceMgr, HeaderInfo, ModLoader, - /*IILookup =*/ 0, - /*OwnsHeaderSearch =*/false, - /*DelayInitialization =*/ false); - PP.EnterMainSourceFile(); - - std::vector toks; - while (1) { - Token tok; - PP.Lex(tok); - if (tok.is(tok::eof)) - break; - toks.push_back(tok); - } + std::vector ExpectedTokens; + ExpectedTokens.push_back(tok::l_square); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_square); + ExpectedTokens.push_back(tok::l_square); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::r_square); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + ExpectedTokens.push_back(tok::identifier); + + std::vector toks = CheckLex("#define M(x) [x]\n" + "#define N(x) x\n" + "#define INN(x) x\n" + "#define NOF1 INN(val)\n" + "#define NOF2 val\n" + "M(foo) N([bar])\n" + "N(INN(val)) N(NOF1) N(NOF2) N(val)", + ExpectedTokens); - // Make sure we got the tokens that we expected. - ASSERT_EQ(10U, toks.size()); - ASSERT_EQ(tok::l_square, toks[0].getKind()); - ASSERT_EQ(tok::identifier, toks[1].getKind()); - ASSERT_EQ(tok::r_square, toks[2].getKind()); - ASSERT_EQ(tok::l_square, toks[3].getKind()); - ASSERT_EQ(tok::identifier, toks[4].getKind()); - ASSERT_EQ(tok::r_square, toks[5].getKind()); - ASSERT_EQ(tok::identifier, toks[6].getKind()); - ASSERT_EQ(tok::identifier, toks[7].getKind()); - ASSERT_EQ(tok::identifier, toks[8].getKind()); - ASSERT_EQ(tok::identifier, toks[9].getKind()); - SourceLocation lsqrLoc = toks[0].getLocation(); SourceLocation idLoc = toks[1].getLocation(); SourceLocation rsqrLoc = toks[2].getLocation(); -- 2.40.0