From 2d44d77fed3200e2eff289f55493317e90d3398c Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Tue, 26 Jun 2012 20:39:18 +0000 Subject: [PATCH] Implement a lexer for structured comments. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@159223 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang-c/Index.h | 6 + include/clang/AST/CommentBriefParser.h | 49 ++ include/clang/AST/CommentLexer.h | 352 +++++++++ include/clang/AST/RawCommentList.h | 25 +- lib/AST/ASTContext.cpp | 4 +- lib/AST/CMakeLists.txt | 2 + lib/AST/CommentBriefParser.cpp | 76 ++ lib/AST/CommentLexer.cpp | 676 ++++++++++++++++ lib/AST/RawCommentList.cpp | 21 + test/Index/annotate-comments.cpp | 39 +- tools/c-index-test/c-index-test.c | 50 +- tools/libclang/CIndex.cpp | 18 + tools/libclang/libclang.exports | 1 + unittests/AST/CMakeLists.txt | 7 + unittests/AST/CommentLexer.cpp | 1010 ++++++++++++++++++++++++ unittests/AST/Makefile | 15 + unittests/Makefile | 2 +- 17 files changed, 2326 insertions(+), 27 deletions(-) create mode 100644 include/clang/AST/CommentBriefParser.h create mode 100644 include/clang/AST/CommentLexer.h create mode 100644 lib/AST/CommentBriefParser.cpp create mode 100644 lib/AST/CommentLexer.cpp create mode 100644 unittests/AST/CMakeLists.txt create mode 100644 unittests/AST/CommentLexer.cpp create mode 100644 unittests/AST/Makefile diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h index b7bd8bb738..2397ae1925 100644 --- a/include/clang-c/Index.h +++ b/include/clang-c/Index.h @@ -3200,6 +3200,12 @@ CINDEX_LINKAGE CXSourceRange clang_Cursor_getCommentRange(CXCursor C); */ CINDEX_LINKAGE CXString clang_Cursor_getRawCommentText(CXCursor C); +/** + * \brief Given a cursor that represents a declaration, return the associated + * \\brief paragraph; otherwise return the first paragraph. + */ +CINDEX_LINKAGE CXString clang_Cursor_getBriefCommentText(CXCursor C); + /** * @} */ diff --git a/include/clang/AST/CommentBriefParser.h b/include/clang/AST/CommentBriefParser.h new file mode 100644 index 0000000000..e343b94643 --- /dev/null +++ b/include/clang/AST/CommentBriefParser.h @@ -0,0 +1,49 @@ +//===--- CommentBriefParser.h - Dumb comment parser -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a very simple Doxygen comment parser. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_CLANG_AST_BRIEF_COMMENT_PARSER_H +#define LLVM_CLANG_AST_BRIEF_COMMENT_PARSER_H + +#include "clang/AST/CommentLexer.h" + +namespace clang { +namespace comments { + +/// A very simple comment parser that extracts just the brief description or +/// first paragraph. +class BriefParser { + Lexer &L; + + /// Current lookahead token. + Token Tok; + + SourceLocation ConsumeToken() { + SourceLocation Loc = Tok.getLocation(); + L.lex(Tok); + return Loc; + } + +public: + BriefParser(Lexer &L); + + /// Return \\brief paragraph, if it exists; otherwise return the first + /// paragraph. + std::string Parse(); +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/CommentLexer.h b/include/clang/AST/CommentLexer.h new file mode 100644 index 0000000000..7f7ae62758 --- /dev/null +++ b/include/clang/AST/CommentLexer.h @@ -0,0 +1,352 @@ +//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines lexer for structured comments and supporting token class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_COMMENT_LEXER_H +#define LLVM_CLANG_AST_COMMENT_LEXER_H + +#include "clang/Basic/SourceManager.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +namespace clang { +namespace comments { + +class Lexer; + +namespace tok { +enum TokenKind { + eof, + newline, + text, + command, + verbatim_block_begin, + verbatim_block_line, + verbatim_block_end, + verbatim_line, + html_tag_open, // + html_tag_close, // + + // Markdown tokens (not supported yet). + ruler, + md_code_line, // Line indented at least by 4 spaces. + md_code_inline, // `code` + md_emph, // _text_ or *text* + md_strong, // __text__ or *text* + md_header // ### level 3 header ### +}; +} // end namespace tok + +class CommentOptions { +public: + bool Markdown; +}; + +/// \brief Comment token. +class Token { + friend class Lexer; + + /// The location of the token. + SourceLocation Loc; + + /// The actual kind of the token. + tok::TokenKind Kind; + + /// Length of the token spelling in comment. Can be 0 for synthenized + /// tokens. + unsigned Length; + + /// Contains text value associated with a token. + const char *TextPtr1; + unsigned TextLen1; + + /// Contains text value associated with a token. + const char *TextPtr2; + unsigned TextLen2; + +public: + SourceLocation getLocation() const LLVM_READONLY { return Loc; } + void setLocation(SourceLocation SL) { Loc = SL; } + + tok::TokenKind getKind() const LLVM_READONLY { return Kind; } + void setKind(tok::TokenKind K) { Kind = K; } + + bool is(tok::TokenKind K) const LLVM_READONLY { return Kind == K; } + bool isNot(tok::TokenKind K) const LLVM_READONLY { return Kind != K; } + + unsigned getLength() const LLVM_READONLY { return Length; } + void setLength(unsigned L) { Length = L; } + + StringRef getText() const LLVM_READONLY { + assert(is(tok::text)); + return StringRef(TextPtr1, TextLen1); + } + + void setText(StringRef Text) { + assert(is(tok::text)); + TextPtr1 = Text.data(); + TextLen1 = Text.size(); + } + + StringRef getCommandName() const LLVM_READONLY { + assert(is(tok::command)); + return StringRef(TextPtr1, TextLen1); + } + + void setCommandName(StringRef Name) { + assert(is(tok::command)); + TextPtr1 = Name.data(); + TextLen1 = Name.size(); + } + + StringRef getVerbatimBlockName() const LLVM_READONLY { + assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); + return StringRef(TextPtr1, TextLen1); + } + + void setVerbatimBlockName(StringRef Name) { + assert(is(tok::verbatim_block_begin) || is(tok::verbatim_block_end)); + TextPtr1 = Name.data(); + TextLen1 = Name.size(); + } + + StringRef getVerbatimBlockText() const LLVM_READONLY { + assert(is(tok::verbatim_block_line)); + return StringRef(TextPtr1, TextLen1); + } + + void setVerbatimBlockText(StringRef Text) { + assert(is(tok::verbatim_block_line)); + TextPtr1 = Text.data(); + TextLen1 = Text.size(); + } + + /// Returns the name of verbatim line command. + StringRef getVerbatimLineName() const LLVM_READONLY { + assert(is(tok::verbatim_line)); + return StringRef(TextPtr1, TextLen1); + } + + void setVerbatimLineName(StringRef Name) { + assert(is(tok::verbatim_line)); + TextPtr1 = Name.data(); + TextLen1 = Name.size(); + } + + StringRef getVerbatimLineText() const LLVM_READONLY { + assert(is(tok::verbatim_line)); + return StringRef(TextPtr2, TextLen2); + } + + void setVerbatimLineText(StringRef Text) { + assert(is(tok::verbatim_line)); + TextPtr2 = Text.data(); + TextLen2 = Text.size(); + } + + StringRef getHTMLTagOpenName() const LLVM_READONLY { + assert(is(tok::html_tag_open)); + return StringRef(TextPtr1, TextLen1); + } + + void setHTMLTagOpenName(StringRef Name) { + assert(is(tok::html_tag_open)); + TextPtr1 = Name.data(); + TextLen1 = Name.size(); + } + + StringRef getHTMLIdent() const LLVM_READONLY { + assert(is(tok::html_ident)); + return StringRef(TextPtr1, TextLen1); + } + + void setHTMLIdent(StringRef Name) { + assert(is(tok::html_ident)); + TextPtr1 = Name.data(); + TextLen1 = Name.size(); + } + + StringRef getHTMLQuotedString() const LLVM_READONLY { + assert(is(tok::html_quoted_string)); + return StringRef(TextPtr1, TextLen1); + } + + void setHTMLQuotedString(StringRef Str) { + assert(is(tok::html_quoted_string)); + TextPtr1 = Str.data(); + TextLen1 = Str.size(); + } + + StringRef getHTMLTagCloseName() const LLVM_READONLY { + assert(is(tok::html_tag_close)); + return StringRef(TextPtr1, TextLen1); + } + + void setHTMLTagCloseName(StringRef Name) { + assert(is(tok::html_tag_close)); + TextPtr1 = Name.data(); + TextLen1 = Name.size(); + } + + void dump(const Lexer &L, const SourceManager &SM) const; +}; + +/// \brief Comment lexer. +class Lexer { +private: + Lexer(const Lexer&); // DO NOT IMPLEMENT + void operator=(const Lexer&); // DO NOT IMPLEMENT + + const char *const BufferStart; + const char *const BufferEnd; + SourceLocation FileLoc; + CommentOptions CommOpts; + + const char *BufferPtr; + + /// One past end pointer for the current comment. For BCPL comments points + /// to newline or BufferEnd, for C comments points to star in '*/'. + const char *CommentEnd; + + enum LexerCommentState { + LCS_BeforeComment, + LCS_InsideBCPLComment, + LCS_InsideCComment, + LCS_BetweenComments + }; + + /// Low-level lexer state, track if we are inside or outside of comment. + LexerCommentState CommentState; + + enum LexerState { + /// Lexing normal comment text + LS_Normal, + + /// Finished lexing verbatim block beginning command, will lex first body + /// line. + LS_VerbatimBlockFirstLine, + + /// Lexing verbatim block body line-by-line, skipping line-starting + /// decorations. + LS_VerbatimBlockBody, + + /// Finished lexing \verbatim VerbatimBlockCommandVector; + + /// Registered verbatim-like block commands. + VerbatimBlockCommandVector VerbatimBlockCommands; + + /// If State is LS_VerbatimBlock, contains the the name of verbatim end + /// command, including command marker. + SmallString<16> VerbatimBlockEndCommandName; + + bool isVerbatimBlockCommand(StringRef BeginName, StringRef &EndName) const; + + /// A verbatim-like line command eats everything until a newline is seen or + /// comment end is hit. + struct VerbatimLineCommand { + StringRef Name; + }; + + typedef SmallVector VerbatimLineCommandVector; + + /// Registered verbatim-like line commands. + VerbatimLineCommandVector VerbatimLineCommands; + + bool isVerbatimLineCommand(StringRef Name) const; + + void formTokenWithChars(Token &Result, const char *TokEnd, + tok::TokenKind Kind) { + const unsigned TokLen = TokEnd - BufferPtr; + Result.setLocation(getSourceLocation(BufferPtr)); + Result.setKind(Kind); + Result.setLength(TokLen); +#ifndef NDEBUG + Result.TextPtr1 = ""; + Result.TextLen1 = 7; + Result.TextPtr2 = ""; + Result.TextLen2 = 7; +#endif + BufferPtr = TokEnd; + } + + SourceLocation getSourceLocation(const char *Loc) const { + assert(Loc >= BufferStart && Loc <= BufferEnd && + "Location out of range for this buffer!"); + + const unsigned CharNo = Loc - BufferStart; + return FileLoc.getLocWithOffset(CharNo); + } + + /// Eat string matching regexp \code \s*\* \endcode. + void skipLineStartingDecorations(); + + /// Lex stuff inside comments. CommentEnd should be set correctly. + void lexCommentText(Token &T); + + void setupAndLexVerbatimBlock(Token &T, + const char *TextBegin, + char Marker, StringRef EndName); + + void lexVerbatimBlockFirstLine(Token &T); + + void lexVerbatimBlockBody(Token &T); + + void lexVerbatimLine(Token &T, const char *TextBegin); + + void setupAndLexHTMLOpenTag(Token &T); + + void lexHTMLOpenTag(Token &T); + + void lexHTMLCloseTag(Token &T); + +public: + Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts, + const char *BufferStart, const char *BufferEnd); + + void lex(Token &T); + + StringRef getSpelling(const Token &Tok, + const SourceManager &SourceMgr, + bool *Invalid = NULL) const; + + /// \brief Register a new verbatim block command. + void addVerbatimBlockCommand(StringRef BeginName, StringRef EndName); + + /// \brief Register a new verbatim line command. + void addVerbatimLineCommand(StringRef Name); +}; + +} // end namespace comments +} // end namespace clang + +#endif + diff --git a/include/clang/AST/RawCommentList.h b/include/clang/AST/RawCommentList.h index 0965cb3a62..d670fd1428 100644 --- a/include/clang/AST/RawCommentList.h +++ b/include/clang/AST/RawCommentList.h @@ -15,6 +15,7 @@ namespace clang { +class ASTContext; class ASTReader; class RawComment { @@ -27,7 +28,7 @@ public: CK_BCPLExcl, ///< \code //! stuff \endcode CK_JavaDoc, ///< \code /** stuff */ \endcode CK_Qt, ///< \code /*! stuff */ \endcode, also used by HeaderDoc - CK_Merged ///< Two or more Doxygen comments merged together + CK_Merged ///< Two or more documentation comments merged together }; RawComment() : Kind(CK_Invalid), IsAlmostTrailingComment(false) { } @@ -53,7 +54,7 @@ public: /// \code /**< stuff */ \endcode /// \code /*!< stuff */ \endcode bool isTrailingComment() const LLVM_READONLY { - assert(isDoxygen()); + assert(isDocumentation()); return IsTrailingComment; } @@ -64,13 +65,13 @@ public: return IsAlmostTrailingComment; } - /// Returns true if this comment is not a Doxygen comment. + /// Returns true if this comment is not a documentation comment. bool isOrdinary() const LLVM_READONLY { return (Kind == CK_OrdinaryBCPL) || (Kind == CK_OrdinaryC); } - /// Returns true if this comment any kind of a Doxygen comment. - bool isDoxygen() const LLVM_READONLY { + /// Returns true if this comment any kind of a documentation comment. + bool isDocumentation() const LLVM_READONLY { return !isInvalid() && !isOrdinary(); } @@ -91,11 +92,21 @@ public: unsigned getBeginLine(const SourceManager &SM) const; unsigned getEndLine(const SourceManager &SM) const; + StringRef getBriefText(const ASTContext &Context) const { + if (BriefTextValid) + return BriefText; + + return extractBriefText(Context); + } + private: SourceRange Range; mutable StringRef RawText; - mutable bool RawTextValid : 1; ///< True if RawText is valid + mutable StringRef BriefText; + + mutable bool RawTextValid : 1; ///< True if RawText is valid + mutable bool BriefTextValid : 1; ///< True if BriefText is valid unsigned Kind : 3; @@ -118,6 +129,8 @@ private: StringRef getRawTextSlow(const SourceManager &SourceMgr) const; + StringRef extractBriefText(const ASTContext &Context) const; + friend class ASTReader; }; diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp index 23751a56f2..ca631736d9 100644 --- a/lib/AST/ASTContext.cpp +++ b/lib/AST/ASTContext.cpp @@ -90,7 +90,7 @@ const RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { // First check whether we have a trailing comment. if (Comment != RawComments.end() && - Comment->isDoxygen() && Comment->isTrailingComment() && + Comment->isDocumentation() && Comment->isTrailingComment() && !isa(D) && !isa(D)) { std::pair CommentBeginDecomp = SourceMgr.getDecomposedLoc(Comment->getSourceRange().getBegin()); @@ -111,7 +111,7 @@ const RawComment *ASTContext::getRawCommentForDeclNoCache(const Decl *D) const { --Comment; // Check that we actually have a non-member Doxygen comment. - if (!Comment->isDoxygen() || Comment->isTrailingComment()) + if (!Comment->isDocumentation() || Comment->isTrailingComment()) return NULL; // Decompose the end of the comment. diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt index d8605367a7..5dad60c490 100644 --- a/lib/AST/CMakeLists.txt +++ b/lib/AST/CMakeLists.txt @@ -8,6 +8,8 @@ add_clang_library(clangAST ASTImporter.cpp AttrImpl.cpp CXXInheritance.cpp + CommentBriefParser.cpp + CommentLexer.cpp Decl.cpp DeclarationName.cpp DeclBase.cpp diff --git a/lib/AST/CommentBriefParser.cpp b/lib/AST/CommentBriefParser.cpp new file mode 100644 index 0000000000..528fd2606f --- /dev/null +++ b/lib/AST/CommentBriefParser.cpp @@ -0,0 +1,76 @@ +//===--- CommentBriefParser.cpp - Dumb comment parser ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/CommentBriefParser.h" + +namespace clang { +namespace comments { + +std::string BriefParser::Parse() { + std::string FirstParagraph; + std::string Brief; + bool InFirstParagraph = true; + bool InBrief = false; + bool BriefDone = false; + + while (Tok.isNot(tok::eof)) { + if (Tok.is(tok::text)) { + if (InFirstParagraph) + FirstParagraph += Tok.getText(); + if (InBrief) + Brief += Tok.getText(); + ConsumeToken(); + continue; + } + + if (!BriefDone && Tok.is(tok::command) && Tok.getCommandName() == "brief") { + InBrief = true; + ConsumeToken(); + continue; + } + + if (Tok.is(tok::newline)) { + if (InFirstParagraph) + FirstParagraph += '\n'; + if (InBrief) + Brief += '\n'; + ConsumeToken(); + + if (Tok.is(tok::newline)) { + ConsumeToken(); + // We found a paragraph end. + InFirstParagraph = false; + if (InBrief) { + InBrief = false; + BriefDone = true; + } + } + continue; + } + + // We didn't handle this token, so just drop it. + ConsumeToken(); + } + + if (Brief.size() > 0) + return Brief; + + return FirstParagraph; +} + +BriefParser::BriefParser(Lexer &L) : L(L) +{ + // Get lookahead token. + ConsumeToken(); +} + +} // end namespace comments +} // end namespace clang + + diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp new file mode 100644 index 0000000000..e5529dad15 --- /dev/null +++ b/lib/AST/CommentLexer.cpp @@ -0,0 +1,676 @@ +#include "clang/AST/CommentLexer.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ErrorHandling.h" + +namespace clang { +namespace comments { + +void Token::dump(const Lexer &L, const SourceManager &SM) const { + llvm::errs() << "comments::Token Kind=" << Kind << " "; + Loc.dump(SM); + llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n"; +} + +bool Lexer::isVerbatimBlockCommand(StringRef BeginName, + StringRef &EndName) const { + const char *Result = llvm::StringSwitch(BeginName) + .Case("code", "endcode") + .Case("verbatim", "endverbatim") + .Case("htmlonly", "endhtmlonly") + .Case("latexonly", "endlatexonly") + .Case("xmlonly", "endxmlonly") + .Case("manonly", "endmanonly") + .Case("rtfonly", "endrtfonly") + + .Case("dot", "enddot") + .Case("msc", "endmsc") + + .Case("f$", "f$") // Inline LaTeX formula + .Case("f[", "f]") // Displayed LaTeX formula + .Case("f{", "f}") // LaTeX environment + + .Default(NULL); + + if (Result) { + EndName = Result; + return true; + } + + for (VerbatimBlockCommandVector::const_iterator + I = VerbatimBlockCommands.begin(), + E = VerbatimBlockCommands.end(); + I != E; ++I) + if (I->BeginName == BeginName) { + EndName = I->EndName; + return true; + } + + return false; +} + +bool Lexer::isVerbatimLineCommand(StringRef Name) const { + bool Result = llvm::StringSwitch(Name) + .Case("fn", true) + .Case("var", true) + .Case("property", true) + .Case("typedef", true) + + .Case("overload", true) + + .Case("defgroup", true) + .Case("ingroup", true) + .Case("addtogroup", true) + .Case("weakgroup", true) + .Case("name", true) + + .Case("section", true) + .Case("subsection", true) + .Case("subsubsection", true) + .Case("paragraph", true) + + .Case("mainpage", true) + .Case("subpage", true) + .Case("ref", true) + + .Default(false); + + if (Result) + return true; + + for (VerbatimLineCommandVector::const_iterator + I = VerbatimLineCommands.begin(), + E = VerbatimLineCommands.end(); + I != E; ++I) + if (I->Name == Name) + return true; + + return false; +} + +void Lexer::skipLineStartingDecorations() { + // This function should be called only for C comments + assert(CommentState == LCS_InsideCComment); + + if (BufferPtr == CommentEnd) + return; + + switch (*BufferPtr) { + case ' ': + case '\t': + case '\f': + case '\v': { + const char *NewBufferPtr = BufferPtr; + NewBufferPtr++; + if (NewBufferPtr == CommentEnd) + return; + + char C = *NewBufferPtr; + while (C == ' ' || C == '\t' || C == '\f' || C == '\v') { + NewBufferPtr++; + if (NewBufferPtr == CommentEnd) + return; + C = *NewBufferPtr; + } + if (C == '*') + BufferPtr = NewBufferPtr + 1; + break; + } + case '*': + BufferPtr++; + break; + } +} + +namespace { +const char *findNewline(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + const char C = *BufferPtr; + if (C == '\n' || C == '\r') + return BufferPtr; + } + return BufferEnd; +} + +const char *skipNewline(const char *BufferPtr, const char *BufferEnd) { + if (BufferPtr == BufferEnd) + return BufferPtr; + + if (*BufferPtr == '\n') + BufferPtr++; + else { + assert(*BufferPtr == '\r'); + BufferPtr++; + if (BufferPtr != BufferEnd && *BufferPtr == '\n') + BufferPtr++; + } + return BufferPtr; +} + +bool isHTMLIdentifierCharacter(char C) { + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9'); +} + +const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isHTMLIdentifierCharacter(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +/// Skip HTML string quoted in single or double quotes. Escaping quotes inside +/// string allowed. +/// +/// Returns pointer to closing quote. +const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd) +{ + const char Quote = *BufferPtr; + assert(Quote == '\"' || Quote == '\''); + + BufferPtr++; + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + const char C = *BufferPtr; + if (C == Quote && BufferPtr[-1] != '\\') + return BufferPtr; + } + return BufferEnd; +} + +bool isHorizontalWhitespace(char C) { + return C == ' ' || C == '\t' || C == '\f' || C == '\v'; +} + +bool isWhitespace(char C) { + return C == ' ' || C == '\n' || C == '\r' || + C == '\t' || C == '\f' || C == '\v'; +} + +const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isWhitespace(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +bool isCommandNameCharacter(char C) { + return (C >= 'a' && C <= 'z') || + (C >= 'A' && C <= 'Z') || + (C >= '0' && C <= '9'); +} + +const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (!isCommandNameCharacter(*BufferPtr)) + return BufferPtr; + } + return BufferEnd; +} + +/// Return the one past end pointer for BCPL comments. +/// Handles newlines escaped with backslash or trigraph for backslahs. +const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) { + const char *CurPtr = BufferPtr; + while (CurPtr != BufferEnd) { + char C = *CurPtr; + while (C != '\n' && C != '\r') { + CurPtr++; + if (CurPtr == BufferEnd) + return BufferEnd; + C = *CurPtr; + } + // We found a newline, check if it is escaped. + const char *EscapePtr = CurPtr - 1; + while(isHorizontalWhitespace(*EscapePtr)) + EscapePtr--; + + if (*EscapePtr == '\\' || + (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' && + EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) { + // We found an escaped newline. + CurPtr = skipNewline(CurPtr, BufferEnd); + } else + return CurPtr; // Not an escaped newline. + } + return BufferEnd; +} + +/// Return the one past end pointer for C comments. +/// Very dumb, does not handle escaped newlines or trigraphs. +const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) { + for ( ; BufferPtr != BufferEnd; ++BufferPtr) { + if (*BufferPtr == '*') { + assert(BufferPtr + 1 != BufferEnd); + if (*(BufferPtr + 1) == '/') + return BufferPtr; + } + } + llvm_unreachable("buffer end hit before '*/' was seen"); +} +} // unnamed namespace + +void Lexer::lexCommentText(Token &T) { + assert(CommentState == LCS_InsideBCPLComment || + CommentState == LCS_InsideCComment); + + switch (State) { + case LS_Normal: + break; + case LS_VerbatimBlockFirstLine: + lexVerbatimBlockFirstLine(T); + return; + case LS_VerbatimBlockBody: + lexVerbatimBlockBody(T); + return; + case LS_HTMLOpenTag: + lexHTMLOpenTag(T); + return; + } + + assert(State == LS_Normal); + + const char *TokenPtr = BufferPtr; + assert(TokenPtr < CommentEnd); + while (TokenPtr != CommentEnd) { + switch(*TokenPtr) { + case '\\': + case '@': { + TokenPtr++; + if (TokenPtr == CommentEnd) { + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength())); + return; + } + char C = *TokenPtr; + switch (C) { + default: + break; + + case '\\': case '@': case '&': case '$': + case '#': case '<': case '>': case '%': + case '\"': case '.': case ':': + // This is one of \\ \@ \& \$ etc escape sequences. + TokenPtr++; + if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') { + // This is the \:: escape sequence. + TokenPtr++; + } + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(StringRef(BufferPtr - (T.getLength() - 1), + T.getLength() - 1)); + return; + } + + // Don't make zero-length commands. + if (!isCommandNameCharacter(*TokenPtr)) { + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength())); + return; + } + + TokenPtr = skipCommandName(TokenPtr, CommentEnd); + unsigned Length = TokenPtr - (BufferPtr + 1); + + // Hardcoded support for lexing LaTeX formula commands + // \f$ \f[ \f] \f{ \f} as a single command. + if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) { + C = *TokenPtr; + if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') { + TokenPtr++; + Length++; + } + } + + const StringRef CommandName(BufferPtr + 1, Length); + StringRef EndName; + + if (isVerbatimBlockCommand(CommandName, EndName)) { + setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, EndName); + return; + } + if (isVerbatimLineCommand(CommandName)) { + lexVerbatimLine(T, TokenPtr); + return; + } + formTokenWithChars(T, TokenPtr, tok::command); + T.setCommandName(CommandName); + return; + } + + case '<': { + TokenPtr++; + if (TokenPtr == CommentEnd) { + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength())); + return; + } + const char C = *TokenPtr; + if (isHTMLIdentifierCharacter(C)) + setupAndLexHTMLOpenTag(T); + else if (C == '/') + lexHTMLCloseTag(T); + return; + } + + case '\n': + case '\r': + TokenPtr = skipNewline(TokenPtr, CommentEnd); + formTokenWithChars(T, TokenPtr, tok::newline); + + if (CommentState == LCS_InsideCComment) + skipLineStartingDecorations(); + return; + + default: { + while (true) { + TokenPtr++; + if (TokenPtr == CommentEnd) + break; + char C = *TokenPtr; + if(C == '\n' || C == '\r' || + C == '\\' || C == '@' || C == '<') + break; + } + formTokenWithChars(T, TokenPtr, tok::text); + T.setText(StringRef(BufferPtr - T.getLength(), T.getLength())); + return; + } + } + } +} + +void Lexer::setupAndLexVerbatimBlock(Token &T, + const char *TextBegin, + char Marker, StringRef EndName) { + VerbatimBlockEndCommandName.clear(); + VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@"); + VerbatimBlockEndCommandName.append(EndName); + + formTokenWithChars(T, TextBegin, tok::verbatim_block_begin); + T.setVerbatimBlockName(StringRef(TextBegin - (T.getLength() - 1), + T.getLength() - 1)); + + State = LS_VerbatimBlockFirstLine; +} + +void Lexer::lexVerbatimBlockFirstLine(Token &T) { + assert(BufferPtr < CommentEnd); + + // FIXME: It would be better to scan the text once, finding either the block + // end command or newline. + // + // Extract current line. + const char *Newline = findNewline(BufferPtr, CommentEnd); + StringRef Line(BufferPtr, Newline - BufferPtr); + + // Look for end command in current line. + size_t Pos = Line.find(VerbatimBlockEndCommandName); + const char *NextLine; + if (Pos == StringRef::npos) { + // Current line is completely verbatim. + NextLine = skipNewline(Newline, CommentEnd); + } else if (Pos == 0) { + // Current line contains just an end command. + const char *End = BufferPtr + VerbatimBlockEndCommandName.size(); + formTokenWithChars(T, End, tok::verbatim_block_end); + T.setVerbatimBlockName(StringRef(End - (T.getLength() - 1), + T.getLength() - 1)); + State = LS_Normal; + return; + } else { + // There is some text, followed by end command. Extract text first. + NextLine = BufferPtr + Pos; + } + + formTokenWithChars(T, NextLine, tok::verbatim_block_line); + T.setVerbatimBlockText(StringRef(NextLine - T.getLength(), T.getLength())); + + State = LS_VerbatimBlockBody; +} + +void Lexer::lexVerbatimBlockBody(Token &T) { + assert(State == LS_VerbatimBlockBody); + + if (CommentState == LCS_InsideCComment) + skipLineStartingDecorations(); + + lexVerbatimBlockFirstLine(T); +} + +void Lexer::lexVerbatimLine(Token &T, const char *TextBegin) { + // Extract current line. + const char *Newline = findNewline(BufferPtr, CommentEnd); + + const StringRef Name(BufferPtr + 1, TextBegin - BufferPtr - 1); + const StringRef Text(TextBegin, Newline - TextBegin); + + formTokenWithChars(T, Newline, tok::verbatim_line); + T.setVerbatimLineName(Name); + T.setVerbatimLineText(Text); +} + +void Lexer::setupAndLexHTMLOpenTag(Token &T) { + assert(BufferPtr[0] == '<' && isHTMLIdentifierCharacter(BufferPtr[1])); + const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd); + + formTokenWithChars(T, TagNameEnd, tok::html_tag_open); + T.setHTMLTagOpenName(StringRef(TagNameEnd - (T.getLength() - 1), + T.getLength() - 1)); + + BufferPtr = skipWhitespace(BufferPtr, CommentEnd); + + if (BufferPtr != CommentEnd && *BufferPtr == '>') { + BufferPtr++; + return; + } + + if (BufferPtr != CommentEnd && isHTMLIdentifierCharacter(*BufferPtr)) + State = LS_HTMLOpenTag; +} + +void Lexer::lexHTMLOpenTag(Token &T) { + assert(State == LS_HTMLOpenTag); + + const char *TokenPtr = BufferPtr; + char C = *TokenPtr; + if (isHTMLIdentifierCharacter(C)) { + TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd); + formTokenWithChars(T, TokenPtr, tok::html_ident); + T.setHTMLIdent(StringRef(TokenPtr - T.getLength(), T.getLength())); + } else { + switch (C) { + case '=': + TokenPtr++; + formTokenWithChars(T, TokenPtr, tok::html_equals); + break; + case '\"': + case '\'': { + const char *OpenQuote = TokenPtr; + TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd); + const char *ClosingQuote = TokenPtr; + if (TokenPtr != CommentEnd) // Skip closing quote. + TokenPtr++; + formTokenWithChars(T, TokenPtr, tok::html_quoted_string); + T.setHTMLQuotedString(StringRef(OpenQuote + 1, + ClosingQuote - (OpenQuote + 1))); + break; + } + case '>': + TokenPtr++; + formTokenWithChars(T, TokenPtr, tok::html_greater); + break; + } + } + + // Now look ahead and return to normal state if we don't see any HTML tokens + // ahead. + BufferPtr = skipWhitespace(BufferPtr, CommentEnd); + if (BufferPtr == CommentEnd) { + State = LS_Normal; + return; + } + + C = *BufferPtr; + if (!isHTMLIdentifierCharacter(C) && + C != '=' && C != '\"' && C != '\'' && C != '>') { + State = LS_Normal; + return; + } +} + +void Lexer::lexHTMLCloseTag(Token &T) { + assert(BufferPtr[0] == '<' && BufferPtr[1] == '/'); + + const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd); + const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd); + + const char *End = skipWhitespace(TagNameEnd, CommentEnd); + if (End != CommentEnd && *End == '>') + End++; + + formTokenWithChars(T, End, tok::html_tag_close); + T.setHTMLTagCloseName(StringRef(TagNameBegin, TagNameEnd - TagNameBegin)); +} + +Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts, + const char *BufferStart, const char *BufferEnd): + BufferStart(BufferStart), BufferEnd(BufferEnd), + FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart), + CommentState(LCS_BeforeComment), State(LS_Normal) { +} + +void Lexer::lex(Token &T) { +again: + switch (CommentState) { + case LCS_BeforeComment: + if (BufferPtr == BufferEnd) { + formTokenWithChars(T, BufferPtr, tok::eof); + return; + } + + assert(*BufferPtr == '/'); + BufferPtr++; // Skip first slash. + switch(*BufferPtr) { + case '/': { // BCPL comment. + BufferPtr++; // Skip second slash. + + if (BufferPtr != BufferEnd) { + // Skip Doxygen magic marker, if it is present. + // It might be missing because of a typo //< or /*<, or because we + // merged this non-Doxygen comment into a bunch of Doxygen comments + // around it: /** ... */ /* ... */ /** ... */ + const char C = *BufferPtr; + if (C == '/' || C == '!') + BufferPtr++; + } + + // Skip less-than symbol that marks trailing comments. + // Skip it even if the comment is not a Doxygen one, because //< and /*< + // are frequent typos. + if (BufferPtr != BufferEnd && *BufferPtr == '<') + BufferPtr++; + + CommentState = LCS_InsideBCPLComment; + State = LS_Normal; + CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd); + goto again; + } + case '*': { // C comment. + BufferPtr++; // Skip star. + + // Skip Doxygen magic marker. + const char C = *BufferPtr; + if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!') + BufferPtr++; + + // Skip less-than symbol that marks trailing comments. + if (BufferPtr != BufferEnd && *BufferPtr == '<') + BufferPtr++; + + CommentState = LCS_InsideCComment; + State = LS_Normal; + CommentEnd = findCCommentEnd(BufferPtr, BufferEnd); + goto again; + } + default: + llvm_unreachable("second character of comment should be '/' or '*'"); + } + + case LCS_BetweenComments: { + // Consecutive comments are extracted only if there is only whitespace + // between them. So we can search for the start of the next comment. + const char *EndWhitespace = BufferPtr; + while(EndWhitespace != BufferEnd && *EndWhitespace != '/') + EndWhitespace++; + + // Turn any whitespace between comments (and there is only whitespace + // between them) into a newline. We have two newlines between comments + // in total (first one was synthesized after a comment). + formTokenWithChars(T, EndWhitespace, tok::newline); + + CommentState = LCS_BeforeComment; + break; + } + + case LCS_InsideBCPLComment: + case LCS_InsideCComment: + if (BufferPtr != CommentEnd) { + lexCommentText(T); + break; + } else { + // Skip C comment closing sequence. + if (CommentState == LCS_InsideCComment) { + assert(BufferPtr[0] == '*' && BufferPtr[1] == '/'); + BufferPtr += 2; + assert(BufferPtr <= BufferEnd); + + // Synthenize newline just after the C comment, regardless if there is + // actually a newline. + formTokenWithChars(T, BufferPtr, tok::newline); + + CommentState = LCS_BetweenComments; + break; + } else { + // Don't synthesized a newline after BCPL comment. + CommentState = LCS_BetweenComments; + goto again; + } + } + } +} + +StringRef Lexer::getSpelling(const Token &Tok, + const SourceManager &SourceMgr, + bool *Invalid) const { + SourceLocation Loc = Tok.getLocation(); + std::pair LocInfo = SourceMgr.getDecomposedLoc(Loc); + + bool InvalidTemp = false; + StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp); + if (InvalidTemp) { + *Invalid = true; + return StringRef(); + } + + const char *Begin = File.data() + LocInfo.second; + return StringRef(Begin, Tok.getLength()); +} + +void Lexer::addVerbatimBlockCommand(StringRef BeginName, StringRef EndName) { + VerbatimBlockCommand VBC; + VBC.BeginName = BeginName; + VBC.EndName = EndName; + VerbatimBlockCommands.push_back(VBC); +} + +void Lexer::addVerbatimLineCommand(StringRef Name) { + VerbatimLineCommand VLC; + VLC.Name = Name; + VerbatimLineCommands.push_back(VLC); +} + +} // end namespace comments +} // end namespace clang + diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp index 438fdcd24c..ede47664d6 100644 --- a/lib/AST/RawCommentList.cpp +++ b/lib/AST/RawCommentList.cpp @@ -8,6 +8,9 @@ //===----------------------------------------------------------------------===// #include "clang/AST/RawCommentList.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/CommentLexer.h" +#include "clang/AST/CommentBriefParser.h" #include "llvm/ADT/STLExtras.h" using namespace clang; @@ -126,6 +129,24 @@ StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const { return StringRef(BufferStart + BeginOffset, Length); } +StringRef RawComment::extractBriefText(const ASTContext &Context) const { + // Make sure that RawText is valid. + getRawText(Context.getSourceManager()); + + comments::Lexer L(Range.getBegin(), comments::CommentOptions(), + RawText.begin(), RawText.end()); + comments::BriefParser P(L); + + const std::string Result = P.Parse(); + const unsigned BriefTextLength = Result.size(); + char *BriefTextPtr = new (Context) char[BriefTextLength + 1]; + memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1); + BriefText = StringRef(BriefTextPtr, BriefTextLength); + BriefTextValid = true; + + return BriefText; +} + namespace { bool containsOnlyWhitespace(StringRef Str) { return Str.find_first_not_of(" \t\f\v\r\n") == StringRef::npos; diff --git a/test/Index/annotate-comments.cpp b/test/Index/annotate-comments.cpp index 634f481139..64d1c983bc 100644 --- a/test/Index/annotate-comments.cpp +++ b/test/Index/annotate-comments.cpp @@ -163,6 +163,37 @@ class test42 { int isdoxy42; /* NOT_DOXYGEN */ ///< isdoxy42 IS_DOXYGEN_SINGLE }; +/// IS_DOXYGEN_START +/// It is fine to have a command at the end of comment. +///\brief +/// +/// Some malformed command. +/* \*/ +/** + * \brief Aaa aaaaaaa aaaa. + * IS_DOXYGEN_END + */ +void isdoxy43(void); + +/// IS_DOXYGEN_START Aaa bbb +/// ccc. +/// +/// Ddd eee. +/// Fff. +/// +/// Ggg. IS_DOXYGEN_END +void isdoxy44(void); + +/// IS_DOXYGEN_START Aaa bbb +/// ccc. +/// +/// \brief +/// Ddd eee. +/// Fff. +/// +/// Ggg. IS_DOXYGEN_END +void isdoxy45(void); + #endif // RUN: rm -rf %t @@ -187,8 +218,8 @@ class test42 { // WRONG-NOT: IS_DOXYGEN_NOT_ATTACHED // Ensure we don't pick up extra comments. -// WRONG-NOT: IS_DOXYGEN_START{{.*}}IS_DOXYGEN_START -// WRONG-NOT: IS_DOXYGEN_END{{.*}}IS_DOXYGEN_END +// WRONG-NOT: IS_DOXYGEN_START{{.*}}IS_DOXYGEN_START{{.*}}BriefComment= +// WRONG-NOT: IS_DOXYGEN_END{{.*}}IS_DOXYGEN_END{{.*}}BriefComment= // RUN: FileCheck %s < %t/out.c-index-direct // RUN: FileCheck %s < %t/out.c-index-pch @@ -226,4 +257,8 @@ class test42 { // CHECK: annotate-comments.cpp:155:6: FunctionDecl=isdoxy40:{{.*}} isdoxy40 IS_DOXYGEN_SINGLE // CHECK: annotate-comments.cpp:160:5: FunctionDecl=isdoxy41:{{.*}} isdoxy41 IS_DOXYGEN_SINGLE // CHECK: annotate-comments.cpp:163:7: FieldDecl=isdoxy42:{{.*}} isdoxy42 IS_DOXYGEN_SINGLE +// CHECK: annotate-comments.cpp:176:6: FunctionDecl=isdoxy43:{{.*}} IS_DOXYGEN_START{{.*}} IS_DOXYGEN_END + +// CHECK: annotate-comments.cpp:185:6: FunctionDecl=isdoxy44:{{.*}} BriefComment=[ IS_DOXYGEN_START Aaa bbb\n ccc.\n] +// CHECK: annotate-comments.cpp:195:6: FunctionDecl=isdoxy45:{{.*}} BriefComment=[\n Ddd eee.\n Fff.\n] diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c index 4c9723da01..73e3e589ce 100644 --- a/tools/c-index-test/c-index-test.c +++ b/tools/c-index-test/c-index-test.c @@ -162,6 +162,24 @@ int parse_remapped_files(int argc, const char **argv, int start_arg, /* Pretty-printing. */ /******************************************************************************/ +static void PrintCString(const char *Prefix, const char *CStr) { + printf(" %s=[", Prefix); + if (CStr != NULL && CStr[0] != '\0') { + for ( ; *CStr; ++CStr) { + const char C = *CStr; + switch (C) { + case '\n': printf("\\n"); break; + case '\r': printf("\\r"); break; + case '\t': printf("\\t"); break; + case '\v': printf("\\v"); break; + case '\f': printf("\\f"); break; + default: putchar(C); break; + } + } + } + printf("]"); +} + static void PrintRange(CXSourceRange R, const char *str) { CXFile begin_file, end_file; unsigned begin_line, begin_column, end_line, end_column; @@ -218,8 +236,10 @@ static void PrintCursor(CXCursor Cursor) { CXPlatformAvailability PlatformAvailability[2]; int NumPlatformAvailability; int I; - CXString Comment; - const char *CommentCString; + CXString RawComment; + const char *RawCommentCString; + CXString BriefComment; + const char *BriefCommentCString; ks = clang_getCursorKindSpelling(Cursor.kind); string = want_display_name? clang_getCursorDisplayName(Cursor) @@ -401,21 +421,19 @@ static void PrintCursor(CXCursor Cursor) { PrintRange(RefNameRange, "RefName"); } - Comment = clang_Cursor_getRawCommentText(Cursor); - CommentCString = clang_getCString(Comment); - if (CommentCString != NULL && CommentCString[0] != '\0') { - printf(" Comment=["); - for ( ; *CommentCString; ++CommentCString) { - if (*CommentCString != '\n') - putchar(*CommentCString); - else - printf("\\n"); - } - printf("]"); - - PrintRange(clang_Cursor_getCommentRange(Cursor), "CommentRange"); + RawComment = clang_Cursor_getRawCommentText(Cursor); + RawCommentCString = clang_getCString(RawComment); + if (RawCommentCString != NULL && RawCommentCString[0] != '\0') { + PrintCString("RawComment", RawCommentCString); + PrintRange(clang_Cursor_getCommentRange(Cursor), "RawCommentRange"); + + BriefComment = clang_Cursor_getBriefCommentText(Cursor); + BriefCommentCString = clang_getCString(BriefComment); + if (BriefCommentCString != NULL && BriefCommentCString[0] != '\0') + PrintCString("BriefComment", BriefCommentCString); + clang_disposeString(BriefComment); } - clang_disposeString(Comment); + clang_disposeString(RawComment); } } diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index df8adb419e..250e9e7b2d 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -5707,6 +5707,24 @@ CXString clang_Cursor_getRawCommentText(CXCursor C) { } // end: extern "C" +CXString clang_Cursor_getBriefCommentText(CXCursor C) { + if (!clang_isDeclaration(C.kind)) + return createCXString((const char *) NULL); + + const Decl *D = getCursorDecl(C); + const ASTContext &Context = getCursorContext(C); + const RawComment *RC = Context.getRawCommentForDecl(D); + + if (RC && RC->isDocumentation()) { + StringRef BriefText = RC->getBriefText(Context); + + // Don't duplicate the string because RawComment ensures that this memory + // will not go away. + return createCXString(BriefText, false); + } + + return createCXString((const char *) NULL); +} //===----------------------------------------------------------------------===// // C++ AST instrospection. diff --git a/tools/libclang/libclang.exports b/tools/libclang/libclang.exports index d24960b297..c28b3b4083 100644 --- a/tools/libclang/libclang.exports +++ b/tools/libclang/libclang.exports @@ -5,6 +5,7 @@ clang_CXIndex_setGlobalOptions clang_CXXMethod_isStatic clang_CXXMethod_isVirtual clang_Cursor_getArgument +clang_Cursor_getBriefCommentText clang_Cursor_getCommentRange clang_Cursor_getRawCommentText clang_Cursor_getNumArguments diff --git a/unittests/AST/CMakeLists.txt b/unittests/AST/CMakeLists.txt new file mode 100644 index 0000000000..738e760419 --- /dev/null +++ b/unittests/AST/CMakeLists.txt @@ -0,0 +1,7 @@ +add_clang_unittest(ASTTests + CommentLexer.cpp + ) + +target_link_libraries(ASTTests + clangAST + ) diff --git a/unittests/AST/CommentLexer.cpp b/unittests/AST/CommentLexer.cpp new file mode 100644 index 0000000000..b73d16ca00 --- /dev/null +++ b/unittests/AST/CommentLexer.cpp @@ -0,0 +1,1010 @@ +//===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/SourceManager.h" +#include "clang/Basic/FileManager.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/AST/CommentLexer.h" +#include "llvm/ADT/STLExtras.h" +#include + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace clang { +namespace comments { + +namespace { +class CommentLexerTest : public ::testing::Test { +protected: + CommentLexerTest() + : FileMgr(FileMgrOpts), + DiagID(new DiagnosticIDs()), + Diags(DiagID, new IgnoringDiagConsumer()), + SourceMgr(Diags, FileMgr) { + } + + FileSystemOptions FileMgrOpts; + FileManager FileMgr; + IntrusiveRefCntPtr DiagID; + DiagnosticsEngine Diags; + SourceManager SourceMgr; + + void lexString(const char *Source, std::vector &Toks); +}; + +void CommentLexerTest::lexString(const char *Source, + std::vector &Toks) { + MemoryBuffer *Buf = MemoryBuffer::getMemBuffer(Source); + FileID File = SourceMgr.createFileIDForMemBuffer(Buf); + SourceLocation Begin = SourceMgr.getLocForStartOfFile(File); + + comments::Lexer L(Begin, CommentOptions(), + Source, Source + strlen(Source)); + + while (1) { + Token Tok; + L.lex(Tok); + if (Tok.is(tok::eof)) + break; + Toks.push_back(Tok); + } +} + +} // unnamed namespace + +// Empty source range should be handled. +TEST_F(CommentLexerTest, Basic1) { + const char *Source = ""; + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(0U, Toks.size()); +} + +// Empty comments should be handled. +TEST_F(CommentLexerTest, Basic2) { + const char *Sources[] = { + "//", "///", "//!", "///<", "//!<" + }; + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(1U, Toks.size()); + + ASSERT_EQ(tok::newline, Toks[0].getKind()); + } +} + +// Empty comments should be handled. +TEST_F(CommentLexerTest, Basic3) { + const char *Sources[] = { + "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/" + }; + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(2U, Toks.size()); + + ASSERT_EQ(tok::newline, Toks[0].getKind()); + ASSERT_EQ(tok::newline, Toks[1].getKind()); + } +} + +// Single comment with plain text. +TEST_F(CommentLexerTest, Basic4) { + const char *Sources[] = { + "// Meow", "/// Meow", "//! Meow", + "// Meow\n", "// Meow\r\n", "//! Meow\r", + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(2U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); + + ASSERT_EQ(tok::newline, Toks[1].getKind()); + } +} + +// Single comment with plain text. +TEST_F(CommentLexerTest, Basic5) { + const char *Sources[] = { + "/* Meow*/", "/** Meow*/", "/*! Meow*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Meow"), Toks[0].getText()); + + ASSERT_EQ(tok::newline, Toks[1].getKind()); + ASSERT_EQ(tok::newline, Toks[2].getKind()); + } +} + +// Test newline escaping. +TEST_F(CommentLexerTest, Basic6) { + const char *Sources[] = { + "// Aaa\\\n" " Bbb\\ \n" " Ccc?" "?/\n", + "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n", + "// Aaa\\\r" " Bbb\\ \r" " Ccc?" "?/\r" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(10U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); + ASSERT_EQ(tok::text, Toks[1].getKind()); + ASSERT_EQ(StringRef("\\"), Toks[1].getText()); + ASSERT_EQ(tok::newline, Toks[2].getKind()); + + ASSERT_EQ(tok::text, Toks[3].getKind()); + ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText()); + ASSERT_EQ(tok::text, Toks[4].getKind()); + ASSERT_EQ(StringRef("\\"), Toks[4].getText()); + ASSERT_EQ(tok::text, Toks[5].getKind()); + ASSERT_EQ(StringRef(" "), Toks[5].getText()); + ASSERT_EQ(tok::newline, Toks[6].getKind()); + + ASSERT_EQ(tok::text, Toks[7].getKind()); + ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText()); + ASSERT_EQ(tok::newline, Toks[8].getKind()); + + ASSERT_EQ(tok::newline, Toks[9].getKind()); + } +} + +// Check that we skip C-style aligned stars correctly. +TEST_F(CommentLexerTest, Basic7) { + const char *Source = + "/* Aaa\n" + " * Bbb\r\n" + "\t* Ccc\n" + " ! Ddd\n" + " * Eee\n" + " ** Fff\n" + " */"; + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(15U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); + ASSERT_EQ(tok::newline, Toks[1].getKind()); + + ASSERT_EQ(tok::text, Toks[2].getKind()); + ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); + ASSERT_EQ(tok::newline, Toks[3].getKind()); + + ASSERT_EQ(tok::text, Toks[4].getKind()); + ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); + ASSERT_EQ(tok::newline, Toks[5].getKind()); + + ASSERT_EQ(tok::text, Toks[6].getKind()); + ASSERT_EQ(StringRef(" ! Ddd"), Toks[6].getText()); + ASSERT_EQ(tok::newline, Toks[7].getKind()); + + ASSERT_EQ(tok::text, Toks[8].getKind()); + ASSERT_EQ(StringRef(" Eee"), Toks[8].getText()); + ASSERT_EQ(tok::newline, Toks[9].getKind()); + + ASSERT_EQ(tok::text, Toks[10].getKind()); + ASSERT_EQ(StringRef("* Fff"), Toks[10].getText()); + ASSERT_EQ(tok::newline, Toks[11].getKind()); + + ASSERT_EQ(tok::text, Toks[12].getKind()); + ASSERT_EQ(StringRef(" "), Toks[12].getText()); + + ASSERT_EQ(tok::newline, Toks[13].getKind()); + ASSERT_EQ(tok::newline, Toks[14].getKind()); +} + +// A command marker followed by comment end. +TEST_F(CommentLexerTest, DoxygenCommand1) { + const char *Sources[] = { "//@", "///@", "//!@" }; + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(2U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef("@"), Toks[0].getText()); + + ASSERT_EQ(tok::newline, Toks[1].getKind()); + } +} + +// A command marker followed by comment end. +TEST_F(CommentLexerTest, DoxygenCommand2) { + const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"}; + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef("@"), Toks[0].getText()); + + ASSERT_EQ(tok::newline, Toks[1].getKind()); + ASSERT_EQ(tok::newline, Toks[2].getKind()); + } +} + +// A command marker followed by comment end. +TEST_F(CommentLexerTest, DoxygenCommand3) { + const char *Sources[] = { "/*\\*/", "/**\\*/" }; + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef("\\"), Toks[0].getText()); + + ASSERT_EQ(tok::newline, Toks[1].getKind()); + ASSERT_EQ(tok::newline, Toks[2].getKind()); + } +} + +// Doxygen escape sequences. +TEST_F(CommentLexerTest, DoxygenCommand4) { + const char *Source = + "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::"; + const char *Text[] = { + " ", + "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ", + "<", " ", ">", " ", "%", " ", "\"", " ", ".", " ", + "::", "" + }; + + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(array_lengthof(Text), Toks.size()); + + for (size_t i = 0, e = Toks.size(); i != e; i++) { + if(Toks[i].is(tok::text)) + ASSERT_EQ(StringRef(Text[i]), Toks[i].getText()) + << "index " << i; + } +} + +TEST_F(CommentLexerTest, DoxygenCommand5) { + const char *Source = "/// \\brief Aaa."; + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::command, Toks[1].getKind()); + ASSERT_EQ(StringRef("brief"), Toks[1].getCommandName()); + + ASSERT_EQ(tok::text, Toks[2].getKind()); + ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText()); + + ASSERT_EQ(tok::newline, Toks[3].getKind()); +} + +TEST_F(CommentLexerTest, DoxygenCommand6) { + const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n"; + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(8U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::command, Toks[1].getKind()); + ASSERT_EQ(StringRef("aaa"), Toks[1].getCommandName()); + + ASSERT_EQ(tok::command, Toks[2].getKind()); + ASSERT_EQ(StringRef("bbb"), Toks[2].getCommandName()); + + ASSERT_EQ(tok::text, Toks[3].getKind()); + ASSERT_EQ(StringRef(" "), Toks[3].getText()); + + ASSERT_EQ(tok::command, Toks[4].getKind()); + ASSERT_EQ(StringRef("ccc"), Toks[4].getCommandName()); + + ASSERT_EQ(tok::text, Toks[5].getKind()); + ASSERT_EQ(StringRef("\t"), Toks[5].getText()); + + ASSERT_EQ(tok::command, Toks[6].getKind()); + ASSERT_EQ(StringRef("ddd"), Toks[6].getCommandName()); + + ASSERT_EQ(tok::newline, Toks[7].getKind()); +} + +// Empty verbatim block. +TEST_F(CommentLexerTest, VerbatimBlock1) { + const char *Sources[] = { + "/// \\verbatim\\endverbatim\n//", + "/** \\verbatim\\endverbatim*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(5U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[2].getKind()); + ASSERT_EQ(StringRef("endverbatim"), Toks[2].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[3].getKind()); + ASSERT_EQ(tok::newline, Toks[4].getKind()); + } +} + +// Empty verbatim block without an end command. +TEST_F(CommentLexerTest, VerbatimBlock2) { + const char *Sources[] = { + "/// \\verbatim\n//", + "/** \\verbatim*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); + ASSERT_EQ(tok::newline, Toks[3].getKind()); + } +} + +// Single-line verbatim block. +TEST_F(CommentLexerTest, VerbatimBlock3) { + const char *Sources[] = { + "/// Meow \\verbatim aaa \\endverbatim\n//", + "/** Meow \\verbatim aaa \\endverbatim*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(6U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); + ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); + ASSERT_EQ(StringRef("endverbatim"), Toks[3].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[4].getKind()); + ASSERT_EQ(tok::newline, Toks[5].getKind()); + } +} + +// Single-line verbatim block without an end command. +TEST_F(CommentLexerTest, VerbatimBlock4) { + const char *Sources[] = { + "/// Meow \\verbatim aaa \n//", + "/** Meow \\verbatim aaa */" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(5U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); + ASSERT_EQ(StringRef(" aaa "), Toks[2].getVerbatimBlockText()); + + ASSERT_EQ(tok::newline, Toks[3].getKind()); + ASSERT_EQ(tok::newline, Toks[4].getKind()); + } +} + +// Complex test for verbatim blocks. +TEST_F(CommentLexerTest, VerbatimBlock5) { + const char *Source = + "/* Meow \\verbatim aaa\\$\\@\n" + "bbb \\endverbati\r" + "ccc\r\n" + "ddd \\endverbatim Blah \\verbatim eee\n" + "\\endverbatim BlahBlah*/"; + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(14U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Meow "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); + ASSERT_EQ(StringRef(" aaa\\$\\@\n"), Toks[2].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[3].getKind()); + ASSERT_EQ(StringRef("bbb \\endverbati\r"), Toks[3].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[4].getKind()); + ASSERT_EQ(StringRef("ccc\r\n"), Toks[4].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[5].getKind()); + ASSERT_EQ(StringRef("ddd "), Toks[5].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[6].getKind()); + ASSERT_EQ(StringRef("endverbatim"), Toks[6].getVerbatimBlockName()); + + ASSERT_EQ(tok::text, Toks[7].getKind()); + ASSERT_EQ(StringRef(" Blah "), Toks[7].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind()); + ASSERT_EQ(StringRef("verbatim"), Toks[8].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[9].getKind()); + ASSERT_EQ(StringRef(" eee\n"), Toks[9].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[10].getKind()); + ASSERT_EQ(StringRef("endverbatim"), Toks[10].getVerbatimBlockName()); + + ASSERT_EQ(tok::text, Toks[11].getKind()); + ASSERT_EQ(StringRef(" BlahBlah"), Toks[11].getText()); + + ASSERT_EQ(tok::newline, Toks[12].getKind()); + ASSERT_EQ(tok::newline, Toks[13].getKind()); +} + +// LaTeX verbatim blocks. +TEST_F(CommentLexerTest, VerbatimBlock6) { + const char *Source = + "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f}"; + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(13U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind()); + ASSERT_EQ(StringRef("f$"), Toks[1].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[2].getKind()); + ASSERT_EQ(StringRef(" Aaa "), Toks[2].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[3].getKind()); + ASSERT_EQ(StringRef("f$"), Toks[3].getVerbatimBlockName()); + + ASSERT_EQ(tok::text, Toks[4].getKind()); + ASSERT_EQ(StringRef(" "), Toks[4].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind()); + ASSERT_EQ(StringRef("f["), Toks[5].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[6].getKind()); + ASSERT_EQ(StringRef(" Bbb "), Toks[6].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[7].getKind()); + ASSERT_EQ(StringRef("f]"), Toks[7].getVerbatimBlockName()); + + ASSERT_EQ(tok::text, Toks[8].getKind()); + ASSERT_EQ(StringRef(" "), Toks[8].getText()); + + ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind()); + ASSERT_EQ(StringRef("f{"), Toks[9].getVerbatimBlockName()); + + ASSERT_EQ(tok::verbatim_block_line, Toks[10].getKind()); + ASSERT_EQ(StringRef(" Ccc "), Toks[10].getVerbatimBlockText()); + + ASSERT_EQ(tok::verbatim_block_end, Toks[11].getKind()); + ASSERT_EQ(StringRef("f}"), Toks[11].getVerbatimBlockName()); + + ASSERT_EQ(tok::newline, Toks[12].getKind()); +} + +// Empty verbatim line. +TEST_F(CommentLexerTest, VerbatimLine1) { + const char *Sources[] = { + "/// \\fn\n//", + "/** \\fn*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_line, Toks[1].getKind()); + ASSERT_EQ(StringRef("fn"), Toks[1].getVerbatimLineName()); + ASSERT_EQ(StringRef(""), Toks[1].getVerbatimLineText()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); + ASSERT_EQ(tok::newline, Toks[3].getKind()); + } +} + +// Verbatim line with Doxygen escape sequences, which should not be expanded. +TEST_F(CommentLexerTest, VerbatimLine2) { + const char *Sources[] = { + "/// \\fn void *foo(const char *zzz = \"\\$\");\n//", + "/** \\fn void *foo(const char *zzz = \"\\$\");*/" + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_line, Toks[1].getKind()); + ASSERT_EQ(StringRef("fn"), Toks[1].getVerbatimLineName()); + ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), + Toks[1].getVerbatimLineText()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); + ASSERT_EQ(tok::newline, Toks[3].getKind()); + } +} + +// Verbatim line should not eat anything from next source line. +TEST_F(CommentLexerTest, VerbatimLine3) { + const char *Source = + "/** \\fn void *foo(const char *zzz = \"\\$\");\n" + " * Meow\n" + " */"; + + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(8U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::verbatim_line, Toks[1].getKind()); + ASSERT_EQ(StringRef("fn"), Toks[1].getVerbatimLineName()); + ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"), + Toks[1].getVerbatimLineText()); + ASSERT_EQ(tok::newline, Toks[2].getKind()); + + ASSERT_EQ(tok::text, Toks[3].getKind()); + ASSERT_EQ(StringRef(" Meow"), Toks[3].getText()); + ASSERT_EQ(tok::newline, Toks[4].getKind()); + + ASSERT_EQ(tok::text, Toks[5].getKind()); + ASSERT_EQ(StringRef(" "), Toks[5].getText()); + + ASSERT_EQ(tok::newline, Toks[6].getKind()); + ASSERT_EQ(tok::newline, Toks[7].getKind()); +} + +TEST_F(CommentLexerTest, HTML1) { + const char *Source = + "// <"; + + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::text, Toks[1].getKind()); + ASSERT_EQ(StringRef("<"), Toks[1].getText()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); +} + +TEST_F(CommentLexerTest, HTML2) { + const char *Sources[] = { + "// Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); + } +} + +TEST_F(CommentLexerTest, HTML3) { + const char *Source = "// Toks; + + lexString(Source, Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName()); + + ASSERT_EQ(tok::text, Toks[2].getKind()); + ASSERT_EQ(StringRef("="), Toks[2].getText()); + + ASSERT_EQ(tok::newline, Toks[3].getKind()); +} + +TEST_F(CommentLexerTest, HTML4) { + const char *Sources[] = { + "// Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName()); + + ASSERT_EQ(tok::html_ident, Toks[2].getKind()); + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent()); + + ASSERT_EQ(tok::newline, Toks[3].getKind()); + } +} + +TEST_F(CommentLexerTest, HTML5) { + const char *Sources[] = { + "// Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(6U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName()); + + ASSERT_EQ(tok::html_ident, Toks[2].getKind()); + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent()); + + ASSERT_EQ(tok::html_equals, Toks[3].getKind()); + + ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); + ASSERT_EQ(StringRef(""), Toks[4].getHTMLQuotedString()); + + ASSERT_EQ(tok::newline, Toks[5].getKind()); + } +} + +TEST_F(CommentLexerTest, HTML7) { + const char *Source = "// Toks; + + lexString(Source, Toks); + + ASSERT_EQ(6U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName()); + + ASSERT_EQ(tok::html_ident, Toks[2].getKind()); + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent()); + + ASSERT_EQ(tok::html_equals, Toks[3].getKind()); + + ASSERT_EQ(tok::text, Toks[4].getKind()); + ASSERT_EQ(StringRef("@"), Toks[4].getText()); + + ASSERT_EQ(tok::newline, Toks[5].getKind()); +} + +TEST_F(CommentLexerTest, HTML8) { + const char *Sources[] = { + "// Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(6U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName()); + + ASSERT_EQ(tok::html_ident, Toks[2].getKind()); + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent()); + + ASSERT_EQ(tok::html_equals, Toks[3].getKind()); + + ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); + ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); + + ASSERT_EQ(tok::newline, Toks[5].getKind()); + } +} + +TEST_F(CommentLexerTest, HTML9) { + const char *Sources[] = { + "// ", + "// " + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(7U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_open, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagOpenName()); + + ASSERT_EQ(tok::html_ident, Toks[2].getKind()); + ASSERT_EQ(StringRef("attr"), Toks[2].getHTMLIdent()); + + ASSERT_EQ(tok::html_equals, Toks[3].getKind()); + + ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind()); + ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString()); + + ASSERT_EQ(tok::html_greater, Toks[5].getKind()); + + ASSERT_EQ(tok::newline, Toks[6].getKind()); + } +} + +TEST_F(CommentLexerTest, HTML10) { + const char *Source = "// Toks; + + lexString(Source, Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_close, Toks[1].getKind()); + ASSERT_EQ(StringRef(""), Toks[1].getHTMLTagCloseName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); +} + + +TEST_F(CommentLexerTest, HTML11) { + const char *Source = "// Toks; + + lexString(Source, Toks); + + ASSERT_EQ(4U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_close, Toks[1].getKind()); + ASSERT_EQ(StringRef(""), Toks[1].getHTMLTagCloseName()); + + ASSERT_EQ(tok::text, Toks[2].getKind()); + ASSERT_EQ(StringRef("@"), Toks[2].getText()); + + ASSERT_EQ(tok::newline, Toks[3].getKind()); +} + +TEST_F(CommentLexerTest, HTML12) { + const char *Sources[] = { + "// ", + "// ", + "// " + }; + + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; + + lexString(Sources[i], Toks); + + ASSERT_EQ(3U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::html_tag_close, Toks[1].getKind()); + ASSERT_EQ(StringRef("tag"), Toks[1].getHTMLTagCloseName()); + + ASSERT_EQ(tok::newline, Toks[2].getKind()); + } +} + +TEST_F(CommentLexerTest, MultipleComments) { + const char *Source = + "// Aaa\n" + "/// Bbb\n" + "/* Ccc\n" + " * Ddd*/\n" + "/** Eee*/"; + + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(12U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText()); + ASSERT_EQ(tok::newline, Toks[1].getKind()); + + ASSERT_EQ(tok::text, Toks[2].getKind()); + ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText()); + ASSERT_EQ(tok::newline, Toks[3].getKind()); + + ASSERT_EQ(tok::text, Toks[4].getKind()); + ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText()); + ASSERT_EQ(tok::newline, Toks[5].getKind()); + + ASSERT_EQ(tok::text, Toks[6].getKind()); + ASSERT_EQ(StringRef(" Ddd"), Toks[6].getText()); + ASSERT_EQ(tok::newline, Toks[7].getKind()); + ASSERT_EQ(tok::newline, Toks[8].getKind()); + + ASSERT_EQ(tok::text, Toks[9].getKind()); + ASSERT_EQ(StringRef(" Eee"), Toks[9].getText()); + + ASSERT_EQ(tok::newline, Toks[10].getKind()); + ASSERT_EQ(tok::newline, Toks[11].getKind()); +} + +} // end namespace comments +} // end namespace clang + diff --git a/unittests/AST/Makefile b/unittests/AST/Makefile new file mode 100644 index 0000000000..b25243f0f7 --- /dev/null +++ b/unittests/AST/Makefile @@ -0,0 +1,15 @@ +##===- unittests/AST/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +CLANG_LEVEL = ../.. +TESTNAME = AST +LINK_COMPONENTS := support mc +USEDLIBS = clangAST.a clangBasic.a + +include $(CLANG_LEVEL)/unittests/Makefile diff --git a/unittests/Makefile b/unittests/Makefile index 05449d8ccf..b9f3c3f63d 100644 --- a/unittests/Makefile +++ b/unittests/Makefile @@ -14,7 +14,7 @@ ifndef CLANG_LEVEL IS_UNITTEST_LEVEL := 1 CLANG_LEVEL := .. -PARALLEL_DIRS = Basic Frontend Lex Tooling +PARALLEL_DIRS = Basic AST Frontend Lex Tooling endif # CLANG_LEVEL -- 2.40.0