From: James Dennett Date: Wed, 15 Jul 2015 19:13:39 +0000 (+0000) Subject: Allow any comment to be a trailing comment when -fparse-all-comments is on. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=563f230426d4c9248d4b2e6000453b7a54486ff7;p=clang Allow any comment to be a trailing comment when -fparse-all-comments is on. This helps with freeform documentation styles, where otherwise code like enum class E { E1, // D1 E2 // D2 }; would result in D1 being associated with E2. To properly associate E1 with D1 and E2 with D2, this patch allows all raw comments C such that C.isParseAllComments() to participate in trailing comment checks inside getRawCommentForDeclNoCache. This takes care of linking the intended documentation with the intended decls. There remains an issue with code like: foo(); // DN int x; To prevent DN from being associated with x, this patch adds a new test on preceding-line comments C (where C.isParseAllComments() and also C's kind is RCK_OrdinaryBCPL or RCK_OrdinaryC) that checks whether C is the first non-whitespace thing on C's starting line. Patch from Luke Zarko , D11069 reviewed by rsmith. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@242317 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp index 24b129a5c5..8317f76b85 100644 --- a/lib/AST/RawCommentList.cpp +++ b/lib/AST/RawCommentList.cpp @@ -15,6 +15,7 @@ #include "clang/AST/CommentLexer.h" #include "clang/AST/CommentParser.h" #include "clang/AST/CommentSema.h" +#include "clang/Basic/CharInfo.h" #include "llvm/ADT/STLExtras.h" using namespace clang; @@ -62,12 +63,53 @@ std::pair getCommentKind(StringRef Comment, bool mergedCommentIsTrailingComment(StringRef Comment) { return (Comment.size() > 3) && (Comment[3] == '<'); } + +/// Returns true if R1 and R2 both have valid locations that start on the same +/// column. +bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1, + const RawComment &R2) { + SourceLocation L1 = R1.getLocStart(); + SourceLocation L2 = R2.getLocStart(); + bool Invalid = false; + unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid); + if (!Invalid) { + unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid); + return !Invalid && (C1 == C2); + } + return false; +} } // unnamed namespace +/// \brief Determines whether there is only whitespace in `Buffer` between `P` +/// and the previous line. +/// \param Buffer The buffer to search in. +/// \param P The offset from the beginning of `Buffer` to start from. +/// \return true if all of the characters in `Buffer` ranging from the closest +/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1` +/// are whitespace. +static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) { + // Search backwards until we see linefeed or carriage return. + for (unsigned I = P; I != 0; --I) { + char C = Buffer[I - 1]; + if (isVerticalWhitespace(C)) + return true; + if (!isHorizontalWhitespace(C)) + return false; + } + // We hit the beginning of the buffer. + return true; +} + +/// Returns whether `K` is an ordinary comment kind. +static bool isOrdinaryKind(RawComment::CommentKind K) { + return (K == RawComment::RCK_OrdinaryBCPL) || + (K == RawComment::RCK_OrdinaryC); +} + RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, bool Merged, bool ParseAllComments) : Range(SR), RawTextValid(false), BriefTextValid(false), - IsAttached(false), IsAlmostTrailingComment(false), + IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false), ParseAllComments(ParseAllComments) { // Extract raw comment text, if possible. if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) { @@ -75,17 +117,34 @@ RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR, return; } + // Guess comment kind. + std::pair K = getCommentKind(RawText, ParseAllComments); + + // Guess whether an ordinary comment is trailing. + if (ParseAllComments && isOrdinaryKind(K.first)) { + FileID BeginFileID; + unsigned BeginOffset; + std::tie(BeginFileID, BeginOffset) = + SourceMgr.getDecomposedLoc(Range.getBegin()); + if (BeginOffset != 0) { + bool Invalid = false; + const char *Buffer = + SourceMgr.getBufferData(BeginFileID, &Invalid).data(); + IsTrailingComment |= + (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset)); + } + } + if (!Merged) { - // Guess comment kind. - std::pair K = getCommentKind(RawText, ParseAllComments); Kind = K.first; - IsTrailingComment = K.second; + IsTrailingComment |= K.second; IsAlmostTrailingComment = RawText.startswith("//<") || RawText.startswith("/*<"); } else { Kind = RCK_Merged; - IsTrailingComment = mergedCommentIsTrailingComment(RawText); + IsTrailingComment = + IsTrailingComment || mergedCommentIsTrailingComment(RawText); } } @@ -239,9 +298,22 @@ void RawCommentList::addComment(const RawComment &RC, const RawComment &C2 = RC; // Merge comments only if there is only whitespace between them. - // Can't merge trailing and non-trailing comments. + // Can't merge trailing and non-trailing comments unless the second is + // non-trailing ordinary in the same column, as in the case: + // int x; // documents x + // // more text + // versus: + // int x; // documents x + // int y; // documents y + // or: + // int x; // documents x + // // documents y + // int y; // Merge comments if they are on same or consecutive lines. - if (C1.isTrailingComment() == C2.isTrailingComment() && + if ((C1.isTrailingComment() == C2.isTrailingComment() || + (C1.isTrailingComment() && !C2.isTrailingComment() && + isOrdinaryKind(C2.getKind()) && + commentsStartOnSameColumn(SourceMgr, C1, C2))) && onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(), /*MaxNewlinesAllowed=*/1)) { SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd()); diff --git a/test/Index/parse-all-comments.c b/test/Index/parse-all-comments.c index f8b0449f20..510a0afa63 100644 --- a/test/Index/parse-all-comments.c +++ b/test/Index/parse-all-comments.c @@ -33,6 +33,41 @@ void multi_line_comment_plus_ordinary(int); // WITH EMPTY LINE void multi_line_comment_empty_line(int); +int notdoxy7; // Not a Doxygen juxtaposed comment. notdoxy7 NOT_DOXYGEN +int notdoxy8; // Not a Doxygen juxtaposed comment. notdoxy8 NOT_DOXYGEN + +int trdoxy9; /// A Doxygen non-trailing comment. trdoxyA IS_DOXYGEN_SINGLE +int trdoxyA; + +int trdoxyB; // Not a Doxygen trailing comment. PART_ONE + // It's a multiline one too. trdoxyB NOT_DOXYGEN +int trdoxyC; + +int trdoxyD; // Not a Doxygen trailing comment. trdoxyD NOT_DOXYGEN + /// This comment doesn't get merged. trdoxyE IS_DOXYGEN +int trdoxyE; + +int trdoxyF; /// A Doxygen non-trailing comment that gets dropped on the floor. + // This comment will also be dropped. +int trdoxyG; // This one won't. trdoxyG NOT_DOXYGEN + +int trdoxyH; ///< A Doxygen trailing comment. PART_ONE + // This one gets merged with it. trdoxyH SOME_DOXYGEN +int trdoxyI; // This one doesn't. trdoxyI NOT_DOXYGEN + +int trdoxyJ; // Not a Doxygen trailing comment. PART_ONE + ///< This one gets merged with it. trdoxyJ SOME_DOXYGEN +int trdoxyK; // This one doesn't. trdoxyK NOT_DOXYGEN + +int trdoxyL; // Not a Doxygen trailing comment. trdoxyL NOT_DOXYGEN +// This one shouldn't get merged. trdoxyM NOT_DOXYGEN +int trdoxyM; + +int trdoxyN; ///< A Doxygen trailing comment. trdoxyN IS_DOXYGEN + // This one shouldn't get merged. trdoxyO NOT_DOXYGEN +int trdoxyO; + + #endif // RUN: rm -rf %t @@ -60,3 +95,21 @@ void multi_line_comment_empty_line(int); // CHECK: parse-all-comments.c:22:6: FunctionDecl=isdoxy6:{{.*}} isdoxy6 IS_DOXYGEN_SINGLE // CHECK: parse-all-comments.c:29:6: FunctionDecl=multi_line_comment_plus_ordinary:{{.*}} BLOCK_ORDINARY_COMMENT {{.*}} ORDINARY COMMENT {{.*}} IS_DOXYGEN_START {{.*}} IS_DOXYGEN_END // CHECK: parse-all-comments.c:34:6: FunctionDecl=multi_line_comment_empty_line:{{.*}} MULTILINE COMMENT{{.*}}\n{{.*}}\n{{.*}} WITH EMPTY LINE +// CHECK: parse-all-comments.c:36:5: VarDecl=notdoxy7:{{.*}} notdoxy7 NOT_DOXYGEN +// CHECK: parse-all-comments.c:37:5: VarDecl=notdoxy8:{{.*}} notdoxy8 NOT_DOXYGEN +// CHECK-NOT: parse-all-comments.c:39:5: VarDecl=trdoxy9:{{.*}} trdoxyA IS_DOXYGEN_SINGLE +// CHECK: parse-all-comments.c:40:5: VarDecl=trdoxyA:{{.*}} trdoxyA IS_DOXYGEN_SINGLE +// CHECK: parse-all-comments.c:42:5: VarDecl=trdoxyB:{{.*}} PART_ONE {{.*}} trdoxyB NOT_DOXYGEN +// CHECK-NOT: parse-all-comments.c:44:5: VarDecl=trdoxyC:{{.*}} trdoxyB NOT_DOXYGEN +// CHECK: parse-all-comments.c:46:5: VarDecl=trdoxyD:{{.*}} trdoxyD NOT_DOXYGEN +// CHECK: parse-all-comments.c:48:5: VarDecl=trdoxyE:{{.*}} trdoxyE IS_DOXYGEN +// CHECK-NOT: parse-all-comments.c:50:5: VarDecl=trdoxyF:{{.*}} RawComment +// CHECK: parse-all-comments.c:52:5: VarDecl=trdoxyG:{{.*}} trdoxyG NOT_DOXYGEN +// CHECK: parse-all-comments.c:54:5: VarDecl=trdoxyH:{{.*}} PART_ONE {{.*}} trdoxyH SOME_DOXYGEN +// CHECK: parse-all-comments.c:56:5: VarDecl=trdoxyI:{{.*}} trdoxyI NOT_DOXYGEN +// CHECK: parse-all-comments.c:58:5: VarDecl=trdoxyJ:{{.*}} PART_ONE {{.*}} trdoxyJ SOME_DOXYGEN +// CHECK: parse-all-comments.c:60:5: VarDecl=trdoxyK:{{.*}} trdoxyK NOT_DOXYGEN +// CHECK: parse-all-comments.c:62:5: VarDecl=trdoxyL:{{.*}} trdoxyL NOT_DOXYGEN +// CHECK: parse-all-comments.c:64:5: VarDecl=trdoxyM:{{.*}} trdoxyM NOT_DOXYGEN +// CHECK: parse-all-comments.c:66:5: VarDecl=trdoxyN:{{.*}} trdoxyN IS_DOXYGEN +// CHECK: parse-all-comments.c:68:5: VarDecl=trdoxyO:{{.*}} trdoxyO NOT_DOXYGEN