From: Dmitri Gribenko Date: Mon, 4 Mar 2013 23:06:15 +0000 (+0000) Subject: Comment parsing: refactor handling of command markers in AST X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=808383d2d6d58a7c7db85f8c7618fb74d821309f;p=clang Comment parsing: refactor handling of command markers in AST * Use the term 'command marker', because the semantics of 'backslash' and 'at' commands are the same. (Talking about 'at commands' makes them look like a special entity.) * Sink the flag down into bitfields, reducing the size of AST nodes. * Change the flag into an enum for clarity. Boolean function parameters are not very clear. * Add unittests for new tok::at_command tokens. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@176461 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/AST/Comment.h b/include/clang/AST/Comment.h index 8c876794b6..c02a82f0fa 100644 --- a/include/clang/AST/Comment.h +++ b/include/clang/AST/Comment.h @@ -28,6 +28,26 @@ class TemplateParameterList; namespace comments { class FullComment; + +/// Describes the syntax that was used in a documentation command. +/// +/// Exact values of this enumeration are important because they used to select +/// parts of diagnostic messages. Audit diagnostics before changing or adding +/// a new value. +enum CommandMarkerKind { + /// Command started with a backslash character: + /// \code + /// \foo + /// \endcode + CMK_Backslash = 0, + + /// Command started with an 'at' character: + /// \code + /// @foo + /// \endcode + CMK_At = 1 +}; + /// Any part of the comment. /// Abstract class. class Comment { @@ -110,8 +130,12 @@ protected: unsigned : NumCommentBits; unsigned CommandID : 8; + + /// Describes the syntax that was used in a documentation command. + /// Contains values from CommandMarkerKind enum. + unsigned CommandMarker : 1; }; - enum { NumBlockCommandCommentBits = NumCommentBits + 8 }; + enum { NumBlockCommandCommentBits = NumCommentBits + 9 }; class ParamCommandCommentBitfields { friend class ParamCommandComment; @@ -570,30 +594,29 @@ protected: /// Paragraph argument. ParagraphComment *Paragraph; - - /// Header Doc command, if true - bool AtCommand; - + BlockCommandComment(CommentKind K, SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, - bool AtCommand) : + CommandMarkerKind CommandMarker) : BlockContentComment(K, LocBegin, LocEnd), - Paragraph(NULL), AtCommand(AtCommand) { + Paragraph(NULL) { setLocation(getCommandNameBeginLoc()); BlockCommandCommentBits.CommandID = CommandID; + BlockCommandCommentBits.CommandMarker = CommandMarker; } public: BlockCommandComment(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, - bool AtCommand) : + CommandMarkerKind CommandMarker) : BlockContentComment(BlockCommandCommentKind, LocBegin, LocEnd), - Paragraph(NULL), AtCommand(AtCommand) { + Paragraph(NULL) { setLocation(getCommandNameBeginLoc()); BlockCommandCommentBits.CommandID = CommandID; + BlockCommandCommentBits.CommandMarker = CommandMarker; } static bool classof(const Comment *C) { @@ -662,9 +685,10 @@ public: if (NewLocEnd.isValid()) setSourceRange(SourceRange(getLocStart(), NewLocEnd)); } - - bool getAtCommand() const LLVM_READONLY { - return AtCommand; + + CommandMarkerKind getCommandMarker() const LLVM_READONLY { + return static_cast( + BlockCommandCommentBits.CommandMarker); } }; @@ -680,9 +704,9 @@ public: ParamCommandComment(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, - bool AtCommand) : + CommandMarkerKind CommandMarker) : BlockCommandComment(ParamCommandCommentKind, LocBegin, LocEnd, - CommandID, AtCommand), + CommandID, CommandMarker), ParamIndex(InvalidParamIndex) { ParamCommandCommentBits.Direction = In; ParamCommandCommentBits.IsDirectionExplicit = false; @@ -763,9 +787,9 @@ public: TParamCommandComment(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, - bool AtCommand) : + CommandMarkerKind CommandMarker) : BlockCommandComment(TParamCommandCommentKind, LocBegin, LocEnd, CommandID, - AtCommand) + CommandMarker) { } static bool classof(const Comment *C) { @@ -846,7 +870,8 @@ public: SourceLocation LocEnd, unsigned CommandID) : BlockCommandComment(VerbatimBlockCommentKind, - LocBegin, LocEnd, CommandID, false) + LocBegin, LocEnd, CommandID, + CMK_At) // FIXME: improve source fidelity. { } static bool classof(const Comment *C) { @@ -899,7 +924,8 @@ public: StringRef Text) : BlockCommandComment(VerbatimLineCommentKind, LocBegin, LocEnd, - CommandID, false), + CommandID, + CMK_At), // FIXME: improve source fidelity. Text(Text), TextBegin(TextBegin) { } diff --git a/include/clang/AST/CommentLexer.h b/include/clang/AST/CommentLexer.h index a52b039c16..4179f45e80 100644 --- a/include/clang/AST/CommentLexer.h +++ b/include/clang/AST/CommentLexer.h @@ -34,9 +34,9 @@ enum TokenKind { eof, newline, text, - unknown_command, // Command that does not have an ID. - backslash_command, // \Command with an ID. - at_command, // @command with an ID. + unknown_command, // Command that does not have an ID. + backslash_command, // Command with an ID, that used backslash marker. + at_command, // Command with an ID, that used 'at' marker. verbatim_block_begin, verbatim_block_line, verbatim_block_end, diff --git a/include/clang/AST/CommentSema.h b/include/clang/AST/CommentSema.h index 18e3ee8bd9..6613feb3d7 100644 --- a/include/clang/AST/CommentSema.h +++ b/include/clang/AST/CommentSema.h @@ -97,7 +97,7 @@ public: BlockCommandComment *actOnBlockCommandStart(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, - bool AtCommand); + CommandMarkerKind CommandMarker); void actOnBlockCommandArgs(BlockCommandComment *Command, ArrayRef Args); @@ -108,7 +108,7 @@ public: ParamCommandComment *actOnParamCommandStart(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, - bool AtCommand); + CommandMarkerKind CommandMarker); void actOnParamCommandDirectionArg(ParamCommandComment *Command, SourceLocation ArgLocBegin, @@ -126,7 +126,7 @@ public: TParamCommandComment *actOnTParamCommandStart(SourceLocation LocBegin, SourceLocation LocEnd, unsigned CommandID, - bool AtCommand); + CommandMarkerKind CommandMarker); void actOnTParamCommandParamNameArg(TParamCommandComment *Command, SourceLocation ArgLocBegin, diff --git a/lib/AST/CommentLexer.cpp b/lib/AST/CommentLexer.cpp index 2018099417..1194520bf3 100644 --- a/lib/AST/CommentLexer.cpp +++ b/lib/AST/CommentLexer.cpp @@ -298,7 +298,11 @@ void Lexer::lexCommentText(Token &T) { switch(*TokenPtr) { case '\\': case '@': { - bool AtCommand = (*TokenPtr == '@'); + // Commands that start with a backslash and commands that start with + // 'at' have equivalent semantics. But we keep information about the + // exact syntax in AST for comments. + tok::TokenKind CommandKind = + (*TokenPtr == '@') ? tok::at_command : tok::backslash_command; TokenPtr++; if (TokenPtr == CommentEnd) { formTextToken(T, TokenPtr); @@ -359,9 +363,7 @@ void Lexer::lexCommentText(Token &T) { setupAndLexVerbatimLine(T, TokenPtr, Info); return; } - formTokenWithChars(T, TokenPtr, - (AtCommand ? tok::at_command - : tok::backslash_command)); + formTokenWithChars(T, TokenPtr, CommandKind); T.setCommandID(Info->getID()); return; } diff --git a/lib/AST/CommentParser.cpp b/lib/AST/CommentParser.cpp index c6bd922461..09912c6188 100644 --- a/lib/AST/CommentParser.cpp +++ b/lib/AST/CommentParser.cpp @@ -308,23 +308,25 @@ BlockCommandComment *Parser::parseBlockCommand() { bool IsParam = false; bool IsTParam = false; const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); + CommandMarkerKind CommandMarker = + Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; if (Info->IsParamCommand) { IsParam = true; PC = S.actOnParamCommandStart(Tok.getLocation(), Tok.getEndLocation(), Tok.getCommandID(), - Tok.is(tok::at_command)); + CommandMarker); } else if (Info->IsTParamCommand) { IsTParam = true; TPC = S.actOnTParamCommandStart(Tok.getLocation(), Tok.getEndLocation(), Tok.getCommandID(), - Tok.is(tok::at_command)); + CommandMarker); } else { BC = S.actOnBlockCommandStart(Tok.getLocation(), Tok.getEndLocation(), Tok.getCommandID(), - Tok.is(tok::at_command)); + CommandMarker); } consumeToken(); diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp index dab48e05df..d959d19b91 100644 --- a/lib/AST/CommentSema.cpp +++ b/lib/AST/CommentSema.cpp @@ -47,11 +47,13 @@ ParagraphComment *Sema::actOnParagraphComment( return new (Allocator) ParagraphComment(Content); } -BlockCommandComment *Sema::actOnBlockCommandStart(SourceLocation LocBegin, - SourceLocation LocEnd, - unsigned CommandID, - bool AtCommand) { - return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID, AtCommand); +BlockCommandComment *Sema::actOnBlockCommandStart( + SourceLocation LocBegin, + SourceLocation LocEnd, + unsigned CommandID, + CommandMarkerKind CommandMarker) { + return new (Allocator) BlockCommandComment(LocBegin, LocEnd, CommandID, + CommandMarker); } void Sema::actOnBlockCommandArgs(BlockCommandComment *Command, @@ -68,17 +70,19 @@ void Sema::actOnBlockCommandFinish(BlockCommandComment *Command, checkDeprecatedCommand(Command); } -ParamCommandComment *Sema::actOnParamCommandStart(SourceLocation LocBegin, - SourceLocation LocEnd, - unsigned CommandID, - bool AtCommand) { +ParamCommandComment *Sema::actOnParamCommandStart( + SourceLocation LocBegin, + SourceLocation LocEnd, + unsigned CommandID, + CommandMarkerKind CommandMarker) { ParamCommandComment *Command = - new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand); + new (Allocator) ParamCommandComment(LocBegin, LocEnd, CommandID, + CommandMarker); if (!isFunctionDecl()) Diag(Command->getLocation(), diag::warn_doc_param_not_attached_to_a_function_decl) - << AtCommand + << CommandMarker << Command->getCommandNameRange(Traits); return Command; @@ -163,17 +167,19 @@ void Sema::actOnParamCommandFinish(ParamCommandComment *Command, checkBlockCommandEmptyParagraph(Command); } -TParamCommandComment *Sema::actOnTParamCommandStart(SourceLocation LocBegin, - SourceLocation LocEnd, - unsigned CommandID, - bool AtCommand) { +TParamCommandComment *Sema::actOnTParamCommandStart( + SourceLocation LocBegin, + SourceLocation LocEnd, + unsigned CommandID, + CommandMarkerKind CommandMarker) { TParamCommandComment *Command = - new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID, AtCommand); + new (Allocator) TParamCommandComment(LocBegin, LocEnd, CommandID, + CommandMarker); if (!isTemplateOrSpecialization()) Diag(Command->getLocation(), diag::warn_doc_tparam_not_attached_to_a_template_decl) - << AtCommand + << CommandMarker << Command->getCommandNameRange(Traits); return Command; @@ -437,7 +443,7 @@ void Sema::checkBlockCommandEmptyParagraph(BlockCommandComment *Command) { if (!DiagLoc.isValid()) DiagLoc = Command->getCommandNameRange(Traits).getEnd(); Diag(DiagLoc, diag::warn_doc_block_command_empty_paragraph) - << Command->getAtCommand() + << Command->getCommandMarker() << Command->getCommandName(Traits) << Command->getSourceRange(); } @@ -465,7 +471,7 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) { } Diag(Command->getLocation(), diag::warn_doc_returns_attached_to_a_void_function) - << Command->getAtCommand() + << Command->getCommandMarker() << Command->getCommandName(Traits) << DiagKind << Command->getSourceRange(); @@ -477,7 +483,7 @@ void Sema::checkReturnsCommand(const BlockCommandComment *Command) { Diag(Command->getLocation(), diag::warn_doc_returns_not_attached_to_a_function_decl) - << Command->getAtCommand() + << Command->getCommandMarker() << Command->getCommandName(Traits) << Command->getSourceRange(); } @@ -510,18 +516,18 @@ void Sema::checkBlockCommandDuplicate(const BlockCommandComment *Command) { StringRef CommandName = Command->getCommandName(Traits); StringRef PrevCommandName = PrevCommand->getCommandName(Traits); Diag(Command->getLocation(), diag::warn_doc_block_command_duplicate) - << Command->getAtCommand() + << Command->getCommandMarker() << CommandName << Command->getSourceRange(); if (CommandName == PrevCommandName) Diag(PrevCommand->getLocation(), diag::note_doc_block_command_previous) - << PrevCommand->getAtCommand() + << PrevCommand->getCommandMarker() << PrevCommandName << PrevCommand->getSourceRange(); else Diag(PrevCommand->getLocation(), diag::note_doc_block_command_previous_alias) - << PrevCommand->getAtCommand() + << PrevCommand->getCommandMarker() << PrevCommandName << CommandName; } diff --git a/unittests/AST/CommentLexer.cpp b/unittests/AST/CommentLexer.cpp index 46de945cca..507daf8391 100644 --- a/unittests/AST/CommentLexer.cpp +++ b/unittests/AST/CommentLexer.cpp @@ -301,8 +301,10 @@ TEST_F(CommentLexerTest, DoxygenCommand3) { // Doxygen escape sequences. TEST_F(CommentLexerTest, DoxygenCommand4) { - const char *Source = - "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::"; + const char *Sources[] = { + "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::", + "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::" + }; const char *Text[] = { " ", "\\", " ", "@", " ", "&", " ", "$", " ", "#", " ", @@ -310,16 +312,18 @@ TEST_F(CommentLexerTest, DoxygenCommand4) { "::", "" }; - std::vector Toks; + for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) { + std::vector Toks; - lexString(Source, Toks); + lexString(Sources[i], Toks); - ASSERT_EQ(array_lengthof(Text), Toks.size()); + ASSERT_EQ(array_lengthof(Text), Toks.size()); - for (size_t i = 0, e = Toks.size(); i != e; i++) { - if(Toks[i].is(tok::text)) - ASSERT_EQ(StringRef(Text[i]), Toks[i].getText()) - << "index " << i; + for (size_t j = 0, e = Toks.size(); j != e; j++) { + if(Toks[j].is(tok::text)) + ASSERT_EQ(StringRef(Text[j]), Toks[j].getText()) + << "index " << i; + } } } @@ -404,6 +408,38 @@ TEST_F(CommentLexerTest, DoxygenCommand7) { } TEST_F(CommentLexerTest, DoxygenCommand8) { + const char *Source = "/// @em@em @em\t@em\n"; + std::vector Toks; + + lexString(Source, Toks); + + ASSERT_EQ(8U, Toks.size()); + + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); + + ASSERT_EQ(tok::at_command, Toks[1].getKind()); + ASSERT_EQ(StringRef("em"), getCommandName(Toks[1])); + + ASSERT_EQ(tok::at_command, Toks[2].getKind()); + ASSERT_EQ(StringRef("em"), getCommandName(Toks[2])); + + ASSERT_EQ(tok::text, Toks[3].getKind()); + ASSERT_EQ(StringRef(" "), Toks[3].getText()); + + ASSERT_EQ(tok::at_command, Toks[4].getKind()); + ASSERT_EQ(StringRef("em"), getCommandName(Toks[4])); + + ASSERT_EQ(tok::text, Toks[5].getKind()); + ASSERT_EQ(StringRef("\t"), Toks[5].getText()); + + ASSERT_EQ(tok::at_command, Toks[6].getKind()); + ASSERT_EQ(StringRef("em"), getCommandName(Toks[6])); + + ASSERT_EQ(tok::newline, Toks[7].getKind()); +} + +TEST_F(CommentLexerTest, DoxygenCommand9) { const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n"; std::vector Toks; @@ -435,7 +471,7 @@ TEST_F(CommentLexerTest, DoxygenCommand8) { ASSERT_EQ(tok::newline, Toks[7].getKind()); } -TEST_F(CommentLexerTest, DoxygenCommand9) { +TEST_F(CommentLexerTest, DoxygenCommand10) { const char *Source = "// \\c\n"; std::vector Toks; @@ -453,7 +489,9 @@ TEST_F(CommentLexerTest, DoxygenCommand9) { } TEST_F(CommentLexerTest, RegisterCustomBlockCommand) { - const char *Source = "/// \\NewBlockCommand Aaa.\n"; + const char *Source = + "/// \\NewBlockCommand Aaa.\n" + "/// @NewBlockCommand Aaa.\n"; Traits.registerBlockCommand(StringRef("NewBlockCommand")); @@ -461,10 +499,10 @@ TEST_F(CommentLexerTest, RegisterCustomBlockCommand) { lexString(Source, Toks); - ASSERT_EQ(4U, Toks.size()); + ASSERT_EQ(8U, Toks.size()); - ASSERT_EQ(tok::text, Toks[0].getKind()); - ASSERT_EQ(StringRef(" "), Toks[0].getText()); + ASSERT_EQ(tok::text, Toks[0].getKind()); + ASSERT_EQ(StringRef(" "), Toks[0].getText()); ASSERT_EQ(tok::backslash_command, Toks[1].getKind()); ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1])); @@ -472,7 +510,18 @@ TEST_F(CommentLexerTest, RegisterCustomBlockCommand) { ASSERT_EQ(tok::text, Toks[2].getKind()); ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText()); - ASSERT_EQ(tok::newline, Toks[3].getKind()); + ASSERT_EQ(tok::newline, Toks[3].getKind()); + + ASSERT_EQ(tok::text, Toks[4].getKind()); + ASSERT_EQ(StringRef(" "), Toks[4].getText()); + + ASSERT_EQ(tok::at_command, Toks[5].getKind()); + ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5])); + + ASSERT_EQ(tok::text, Toks[6].getKind()); + ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText()); + + ASSERT_EQ(tok::newline, Toks[7].getKind()); } TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {