From: Dmitri Gribenko Date: Tue, 22 Apr 2014 10:59:13 +0000 (+0000) Subject: Comment parsing: in the generated XML file, mark HTML that is safe to pass X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e36bbd1eec01bfb06927de7791ec13135198fa68;p=clang Comment parsing: in the generated XML file, mark HTML that is safe to pass through to the output even if the input comment comes from an untrusted source Attribute filtering is currently based on a blacklist, which right now includes all event handler attributes (they contain JavaScipt code). It should be switched to a whitelist, but going over all of the HTML5 spec requires a significant amount of time. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@206882 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/bindings/xml/comment-xml-schema.rng b/bindings/xml/comment-xml-schema.rng index a8913a360b..29a91bf674 100644 --- a/bindings/xml/comment-xml-schema.rng +++ b/bindings/xml/comment-xml-schema.rng @@ -580,6 +580,12 @@ + + + + + + .*\S.* diff --git a/include/clang/AST/Comment.h b/include/clang/AST/Comment.h index 50e9196b1a..90dfb2570a 100644 --- a/include/clang/AST/Comment.h +++ b/include/clang/AST/Comment.h @@ -100,16 +100,27 @@ protected: }; enum { NumInlineCommandCommentBits = NumInlineContentCommentBits + 10 }; + class HTMLTagCommentBitfields { + friend class HTMLTagComment; + + unsigned : NumInlineContentCommentBits; + + /// True if this tag is safe to pass through to HTML output even if the + /// comment comes from an untrusted source. + unsigned IsSafeToPassThrough : 1; + }; + enum { NumHTMLTagCommentBits = NumInlineContentCommentBits + 1 }; + class HTMLStartTagCommentBitfields { friend class HTMLStartTagComment; - unsigned : NumInlineContentCommentBits; + unsigned : NumHTMLTagCommentBits; /// True if this tag is self-closing (e. g.,
). This is based on tag /// spelling in comment (plain
would not set this flag). unsigned IsSelfClosing : 1; }; - enum { NumHTMLStartTagCommentBits = NumInlineContentCommentBits + 1 }; + enum { NumHTMLStartTagCommentBits = NumHTMLTagCommentBits + 1 }; class ParagraphCommentBitfields { friend class ParagraphComment; @@ -155,6 +166,7 @@ protected: InlineContentCommentBitfields InlineContentCommentBits; TextCommentBitfields TextCommentBits; InlineCommandCommentBitfields InlineCommandCommentBits; + HTMLTagCommentBitfields HTMLTagCommentBits; HTMLStartTagCommentBitfields HTMLStartTagCommentBits; ParagraphCommentBitfields ParagraphCommentBits; BlockCommandCommentBitfields BlockCommandCommentBits; @@ -360,8 +372,7 @@ public: }; /// Abstract class for opening and closing HTML tags. HTML tags are always -/// treated as inline content (regardless HTML semantics); opening and closing -/// tags are not matched. +/// treated as inline content (regardless HTML semantics). class HTMLTagComment : public InlineContentComment { protected: StringRef TagName; @@ -377,6 +388,7 @@ protected: TagName(TagName), TagNameRange(TagNameBegin, TagNameEnd) { setLocation(TagNameBegin); + HTMLTagCommentBits.IsSafeToPassThrough = 1; } public: @@ -392,6 +404,14 @@ public: return SourceRange(L.getLocWithOffset(1), L.getLocWithOffset(1 + TagName.size())); } + + bool isSafeToPassThrough() const { + return HTMLTagCommentBits.IsSafeToPassThrough; + } + + void setUnsafeToPassThrough() { + HTMLTagCommentBits.IsSafeToPassThrough = 0; + } }; /// An opening HTML tag with attributes. diff --git a/include/clang/AST/CommentHTMLTags.td b/include/clang/AST/CommentHTMLTags.td index f98e32ddca..79951b80ee 100644 --- a/include/clang/AST/CommentHTMLTags.td +++ b/include/clang/AST/CommentHTMLTags.td @@ -52,3 +52,83 @@ def Tr : Tag<"tr"> { let EndTagOptional = 1; } def Th : Tag<"th"> { let EndTagOptional = 1; } def Td : Tag<"td"> { let EndTagOptional = 1; } +// Define a blacklist of attributes that are not safe to pass through to HTML +// output if the input is untrusted. +// +// FIXME: this should be a whitelist. When changing this to a whitelist, don't +// forget to change the default in the TableGen backend. +class Attribute { + string Spelling = spelling; + bit IsSafeToPassThrough = 1; +} +class EventHandlerContentAttribute : Attribute { + let IsSafeToPassThrough = 0; +} + +// This list is based on HTML5 draft as of 04 February 2014. +// +// The list is intentionally organized as one item per line to make it easier +// to compare with the HTML spec. +foreach AttrName = [ + "onabort", + "onblur", + "oncancel", + "oncanplay", + "oncanplaythrough", + "onchange", + "onclick", + "onclose", + "oncuechange", + "ondblclick", + "ondrag", + "ondragend", + "ondragenter", + "ondragexit", + "ondragleave", + "ondragover", + "ondragstart", + "ondrop", + "ondurationchange", + "onemptied", + "onended", + "onerror", + "onfocus", + "oninput", + "oninvalid", + "onkeydown", + "onkeypress", + "onkeyup", + "onload", + "onloadeddata", + "onloadedmetadata", + "onloadstart", + "onmousedown", + "onmouseenter", + "onmouseleave", + "onmousemove", + "onmouseout", + "onmouseover", + "onmouseup", + "onmousewheel", + "onpause", + "onplay", + "onplaying", + "onprogress", + "onratechange", + "onreset", + "onresize", + "onscroll", + "onseeked", + "onseeking", + "onselect", + "onshow", + "onstalled", + "onsubmit", + "onsuspend", + "ontimeupdate", + "ontoggle", + "onvolumechange", + "onwaiting" + ] in { + def Attr#AttrName : EventHandlerContentAttribute; +} diff --git a/include/clang/Basic/DiagnosticCommentKinds.td b/include/clang/Basic/DiagnosticCommentKinds.td index 49781fec9a..49296f9b5e 100644 --- a/include/clang/Basic/DiagnosticCommentKinds.td +++ b/include/clang/Basic/DiagnosticCommentKinds.td @@ -41,6 +41,10 @@ def warn_doc_html_start_end_mismatch : Warning< def note_doc_html_end_tag : Note< "end tag">; +def warn_doc_html_missing_end_tag : Warning< + "HTML tag '%0' requires an end tag">, + InGroup, DefaultIgnore; + // Commands def warn_doc_block_command_empty_paragraph : Warning< diff --git a/lib/AST/CommentSema.cpp b/lib/AST/CommentSema.cpp index e51f8781c1..5a0bf37739 100644 --- a/lib/AST/CommentSema.cpp +++ b/lib/AST/CommentSema.cpp @@ -467,6 +467,11 @@ void Sema::actOnHTMLStartTagFinish( SourceLocation GreaterLoc, bool IsSelfClosing) { Tag->setAttrs(Attrs); + for (const auto &Attr : Attrs) { + if (!isHTMLAttributeSafeToPassThrough(Attr.Name)) + Tag->setUnsafeToPassThrough(); + } + Tag->setGreaterLoc(GreaterLoc); if (IsSelfClosing) Tag->setSelfClosing(); @@ -482,6 +487,7 @@ HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin, if (isHTMLEndTagForbidden(TagName)) { Diag(HET->getLocation(), diag::warn_doc_html_end_forbidden) << TagName << HET->getSourceRange(); + HET->setUnsafeToPassThrough(); return HET; } @@ -497,14 +503,19 @@ HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin, if (!FoundOpen) { Diag(HET->getLocation(), diag::warn_doc_html_end_unbalanced) << HET->getSourceRange(); + HET->setUnsafeToPassThrough(); return HET; } while (!HTMLOpenTags.empty()) { - const HTMLStartTagComment *HST = HTMLOpenTags.pop_back_val(); + HTMLStartTagComment *HST = HTMLOpenTags.pop_back_val(); StringRef LastNotClosedTagName = HST->getTagName(); - if (LastNotClosedTagName == TagName) + if (LastNotClosedTagName == TagName) { + // If the start tag is unsafe, end tag is unsafe as well. + if (!HST->isSafeToPassThrough()) + HET->setUnsafeToPassThrough(); break; + } if (isHTMLEndTagOptional(LastNotClosedTagName)) continue; @@ -518,16 +529,18 @@ HTMLEndTagComment *Sema::actOnHTMLEndTag(SourceLocation LocBegin, HET->getLocation(), &CloseLineInvalid); - if (OpenLineInvalid || CloseLineInvalid || OpenLine == CloseLine) + if (OpenLineInvalid || CloseLineInvalid || OpenLine == CloseLine) { Diag(HST->getLocation(), diag::warn_doc_html_start_end_mismatch) << HST->getTagName() << HET->getTagName() << HST->getSourceRange() << HET->getSourceRange(); - else { + HST->setUnsafeToPassThrough(); + } else { Diag(HST->getLocation(), diag::warn_doc_html_start_end_mismatch) << HST->getTagName() << HET->getTagName() << HST->getSourceRange(); Diag(HET->getLocation(), diag::note_doc_html_end_tag) << HET->getSourceRange(); + HST->setUnsafeToPassThrough(); } } @@ -538,6 +551,18 @@ FullComment *Sema::actOnFullComment( ArrayRef Blocks) { FullComment *FC = new (Allocator) FullComment(Blocks, ThisDeclInfo); resolveParamCommandIndexes(FC); + + // Complain about HTML tags that are not closed. + while (!HTMLOpenTags.empty()) { + HTMLStartTagComment *HST = HTMLOpenTags.pop_back_val(); + if (isHTMLEndTagOptional(HST->getTagName())) + continue; + + Diag(HST->getLocation(), diag::warn_doc_html_missing_end_tag) + << HST->getTagName() << HST->getSourceRange(); + HST->setUnsafeToPassThrough(); + } + return FC; } diff --git a/lib/Index/CommentToXML.cpp b/lib/Index/CommentToXML.cpp index 43c423274d..377440f81d 100644 --- a/lib/Index/CommentToXML.cpp +++ b/lib/Index/CommentToXML.cpp @@ -667,14 +667,20 @@ void CommentASTToXMLConverter::visitInlineCommandComment( void CommentASTToXMLConverter::visitHTMLStartTagComment( const HTMLStartTagComment *C) { - Result << "isSafeToPassThrough()) + Result << " isSafeToPassThrough=\"1\""; + Result << ">"; } void CommentASTToXMLConverter::visitHTMLEndTagComment(const HTMLEndTagComment *C) { - Result << "</" << C->getTagName() << ">"; + Result << "isSafeToPassThrough()) + Result << " isSafeToPassThrough=\"1\""; + Result << "></" << C->getTagName() << ">"; } void diff --git a/test/Index/Inputs/CommentXML/valid-function-02.xml b/test/Index/Inputs/CommentXML/valid-function-02.xml index 989d6a7c14..98e4fd1c1e 100644 --- a/test/Index/Inputs/CommentXML/valid-function-02.xml +++ b/test/Index/Inputs/CommentXML/valid-function-02.xml @@ -1,5 +1,14 @@ aaa -Aaa bbb ccc ddd. + + Aaa + bbb + ccc + ddd + <eee> + <fff> + <ggg>. + + diff --git a/test/Index/comment-to-html-xml-conversion.cpp b/test/Index/comment-to-html-xml-conversion.cpp index 327fa64483..590e187197 100644 --- a/test/Index/comment-to-html-xml-conversion.cpp +++ b/test/Index/comment-to-html-xml-conversion.cpp @@ -472,7 +472,7 @@ void test_full_comment_1(int x1, int x2); ///
Aaa void comment_to_html_conversion_24(); -// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[


Aaa

] FullCommentAsXML=[comment_to_html_conversion_24c:@F@comment_to_html_conversion_24#void comment_to_html_conversion_24() ]]>]]>Aaa</a>] +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[


Aaa

] FullCommentAsXML=[comment_to_html_conversion_24c:@F@comment_to_html_conversion_24#void comment_to_html_conversion_24() ]]>]]>Aaa</a>] // CHECK-NEXT: CommentAST=[ // CHECK-NEXT: (CXComment_FullComment // CHECK-NEXT: (CXComment_Paragraph @@ -678,7 +678,7 @@ void comment_to_html_conversion_33(); /// 0<i void comment_to_html_conversion_34(); -// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_34:{{.*}} FullCommentAsHTML=[

0<i

] FullCommentAsXML=[comment_to_html_conversion_34c:@F@comment_to_html_conversion_34#void comment_to_html_conversion_34() ]]>0<i</em>] +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:6: FunctionDecl=comment_to_html_conversion_34:{{.*}} FullCommentAsHTML=[

0<i

] FullCommentAsXML=[comment_to_html_conversion_34c:@F@comment_to_html_conversion_34#void comment_to_html_conversion_34() ]]>0<i</em>] // CHECK-NEXT: CommentAST=[ // CHECK-NEXT: (CXComment_FullComment // CHECK-NEXT: (CXComment_Paragraph @@ -853,6 +853,34 @@ enum class comment_to_xml_conversion_17 { // CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:3: EnumConstantDecl=comment_to_xml_conversion_18:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_18c:@E@comment_to_xml_conversion_17@comment_to_xml_conversion_18comment_to_xml_conversion_18 Aaa.] }; +/// +void comment_to_xml_conversion_unsafe_html_01(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_01:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_unsafe_html_01c:@F@comment_to_xml_conversion_unsafe_html_01#void comment_to_xml_conversion_unsafe_html_01() ]]>] + +/// Aaa +void comment_to_xml_conversion_unsafe_html_02(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_02:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_unsafe_html_02c:@F@comment_to_xml_conversion_unsafe_html_02#void comment_to_xml_conversion_unsafe_html_02() ]]>]]>Aaa</em>] + +/// Aaa +void comment_to_xml_conversion_unsafe_html_03(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_03:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_unsafe_html_03c:@F@comment_to_xml_conversion_unsafe_html_03#void comment_to_xml_conversion_unsafe_html_03() ]]>Aaa] + +/// Aaa +void comment_to_xml_conversion_unsafe_html_04(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_04:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_unsafe_html_04c:@F@comment_to_xml_conversion_unsafe_html_04#void comment_to_xml_conversion_unsafe_html_04() ]]>Aaa</b></em>] + +/// Aaa +void comment_to_xml_conversion_unsafe_html_05(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_05:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_unsafe_html_05c:@F@comment_to_xml_conversion_unsafe_html_05#void comment_to_xml_conversion_unsafe_html_05() ]]>Aaa</em></b>] + +/// +void comment_to_xml_conversion_unsafe_html_06(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_06:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_unsafe_html_06c:@F@comment_to_xml_conversion_unsafe_html_06#void comment_to_xml_conversion_unsafe_html_06() </table>] + +///
Aaa
+void comment_to_xml_conversion_unsafe_html_07(); +// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: FunctionDecl=comment_to_xml_conversion_unsafe_html_07:{{.*}} FullCommentAsXML=[comment_to_xml_conversion_unsafe_html_07c:@F@comment_to_xml_conversion_unsafe_html_07#void comment_to_xml_conversion_unsafe_html_07() ]]>Aaa</div>] + //===--- // Check that we attach comments from the base class to derived classes if they don't have a comment. // rdar://13647476 diff --git a/test/Sema/warn-documentation.cpp b/test/Sema/warn-documentation.cpp index ed25d949f5..4375cfcf56 100644 --- a/test/Sema/warn-documentation.cpp +++ b/test/Sema/warn-documentation.cpp @@ -4,35 +4,43 @@ // RUN: c-index-test -test-load-source all -comments-xml-schema=%S/../../bindings/xml/comment-xml-schema.rng %s | FileCheck %s -check-prefix=WRONG // WRONG-NOT: CommentXMLInvalid +// expected-warning@+2 {{HTML tag 'a' requires an end tag}} // expected-warning@+1 {{expected quoted string after equals sign}} ///
int test_html1(int); +// expected-warning@+2 {{HTML tag 'a' requires an end tag}} // expected-warning@+1 {{expected quoted string after equals sign}} /// int test_html2(int); +// expected-warning@+3 {{HTML tag 'a' requires an end tag}} // expected-warning@+2 {{expected quoted string after equals sign}} // expected-warning@+1 {{HTML start tag prematurely ended, expected attribute name or '>'}} /// '}} /// int test_html4(int); +// expected-warning@+2 {{HTML tag 'a' requires an end tag}} // expected-warning@+1 {{HTML start tag prematurely ended, expected attribute name or '>'}} /// int test_html5(int); +// expected-warning@+2 {{HTML tag 'a' requires an end tag}} // expected-warning@+1 {{HTML start tag prematurely ended, expected attribute name or '>'}} /// int test_html6(int); +// expected-warning@+2 {{HTML tag 'a' requires an end tag}} // expected-warning@+1 {{HTML start tag prematurely ended, expected attribute name or '>'}} /// int test_html7(int); +// expected-warning@+2 {{HTML tag 'a' requires an end tag}} // expected-warning@+1 {{HTML start tag prematurely ended, expected attribute name or '>'}} /// int test_html_nesting4(int); +// expected-warning@+3 {{HTML tag 'b' requires an end tag}} +// expected-warning@+2 {{HTML tag 'i' requires an end tag}} // expected-warning@+1 {{HTML end tag does not match any start tag}} /// Meow int test_html_nesting5(int); @@ -81,6 +91,9 @@ int test_html_nesting6(int); /// Meow int test_html_nesting7(int); +// expected-warning@+1 {{HTML tag 'b' requires an end tag}} +/// Meow +int test_html_nesting8(int); // expected-warning@+1 {{empty paragraph passed to '\brief' command}} /// \brief\returns Aaa diff --git a/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp b/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp index bfcd2cfd15..2a9110f8eb 100644 --- a/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp +++ b/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "TableGenBackends.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" #include "llvm/TableGen/TableGenBackend.h" @@ -19,14 +20,11 @@ using namespace llvm; -namespace clang { -void EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS) { +void clang::EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS) { std::vector Tags = Records.getAllDerivedDefinitions("Tag"); std::vector Matches; - for (std::vector::iterator I = Tags.begin(), E = Tags.end(); - I != E; ++I) { - Record &Tag = **I; - std::string Spelling = Tag.getValueAsString("Spelling"); + for (Record *Tag : Tags) { + std::string Spelling = Tag->getValueAsString("Spelling"); Matches.push_back(StringMatcher::StringPair(Spelling, "return true;")); } @@ -38,19 +36,17 @@ void EmitClangCommentHTMLTags(RecordKeeper &Records, raw_ostream &OS) { << "}\n\n"; } -void EmitClangCommentHTMLTagsProperties(RecordKeeper &Records, - raw_ostream &OS) { +void clang::EmitClangCommentHTMLTagsProperties(RecordKeeper &Records, + raw_ostream &OS) { std::vector Tags = Records.getAllDerivedDefinitions("Tag"); std::vector MatchesEndTagOptional; std::vector MatchesEndTagForbidden; - for (std::vector::iterator I = Tags.begin(), E = Tags.end(); - I != E; ++I) { - Record &Tag = **I; - std::string Spelling = Tag.getValueAsString("Spelling"); + for (Record *Tag : Tags) { + std::string Spelling = Tag->getValueAsString("Spelling"); StringMatcher::StringPair Match(Spelling, "return true;"); - if (Tag.getValueAsBit("EndTagOptional")) + if (Tag->getValueAsBit("EndTagOptional")) MatchesEndTagOptional.push_back(Match); - if (Tag.getValueAsBit("EndTagForbidden")) + if (Tag->getValueAsBit("EndTagForbidden")) MatchesEndTagForbidden.push_back(Match); } @@ -65,6 +61,21 @@ void EmitClangCommentHTMLTagsProperties(RecordKeeper &Records, StringMatcher("Name", MatchesEndTagForbidden, OS).Emit(); OS << " return false;\n" << "}\n\n"; + + std::vector Attributes = + Records.getAllDerivedDefinitions("Attribute"); + std::vector Matches; + for (Record *Attribute : Attributes) { + std::string Spelling = Attribute->getValueAsString("Spelling"); + if (!Attribute->getValueAsBit("IsSafeToPassThrough")) + Matches.push_back(StringMatcher::StringPair(Spelling, "return false;")); + } + + emitSourceFileHeader("HTML attribute name matcher", OS); + + OS << "bool isHTMLAttributeSafeToPassThrough(StringRef Name) {\n"; + StringMatcher("Name", Matches, OS).Emit(); + OS << " return true;\n" + << "}\n\n"; } -} // end namespace clang