Lexer(const Lexer&); // DO NOT IMPLEMENT
void operator=(const Lexer&); // DO NOT IMPLEMENT
+ /// Allocator for strings that are semantic values of tokens and have to be
+ /// computed (for example, resolved decimal character references).
+ llvm::BumpPtrAllocator &Allocator;
+
const char *const BufferStart;
const char *const BufferEnd;
SourceLocation FileLoc;
bool isVerbatimLineCommand(StringRef Name) const;
+ /// Given a character reference name (e.g., "lt"), return the character that
+ /// it stands for (e.g., "<").
+ StringRef resolveHTMLNamedCharacterReference(StringRef Name) const;
+
+ /// Given a Unicode codepoint as base-10 integer, return the character.
+ StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const;
+
+ /// Given a Unicode codepoint as base-16 integer, return the character.
+ StringRef resolveHTMLHexCharacterReference(StringRef Name) const;
+
void formTokenWithChars(Token &Result, const char *TokEnd,
tok::TokenKind Kind) {
const unsigned TokLen = TokEnd - BufferPtr;
BufferPtr = TokEnd;
}
+ void formTextToken(Token &Result, const char *TokEnd) {
+ StringRef Text(BufferPtr, TokEnd - BufferPtr);
+ formTokenWithChars(Result, TokEnd, tok::text);
+ Result.setText(Text);
+ }
+
SourceLocation getSourceLocation(const char *Loc) const {
assert(Loc >= BufferStart && Loc <= BufferEnd &&
"Location out of range for this buffer!");
void lexVerbatimLineText(Token &T);
+ void lexHTMLCharacterReference(Token &T);
+
void setupAndLexHTMLStartTag(Token &T);
void lexHTMLStartTag(Token &T);
void lexHTMLEndTag(Token &T);
public:
- Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
+ Lexer(llvm::BumpPtrAllocator &Allocator,
+ SourceLocation FileLoc, const CommentOptions &CommOpts,
const char *BufferStart, const char *BufferEnd);
void lex(Token &T);
return NULL;
const StringRef RawText = RC->getRawText(SourceMgr);
- comments::Lexer L(RC->getSourceRange().getBegin(), comments::CommentOptions(),
+ comments::Lexer L(getAllocator(),
+ RC->getSourceRange().getBegin(), comments::CommentOptions(),
RawText.begin(), RawText.end());
comments::Sema S(getAllocator(), getSourceManager(), getDiagnostics());
#include "clang/AST/CommentLexer.h"
+#include "clang/Basic/ConvertUTF.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
return false;
}
+namespace {
+bool isHTMLNamedCharacterReferenceCharacter(char C) {
+ return (C >= 'a' && C <= 'z') ||
+ (C >= 'A' && C <= 'Z');
+}
+
+bool isHTMLDecimalCharacterReferenceCharacter(char C) {
+ return C >= '0' && C <= '9';
+}
+
+bool isHTMLHexCharacterReferenceCharacter(char C) {
+ return (C >= '0' && C <= '9') ||
+ (C >= 'a' && C <= 'f') ||
+ (C >= 'A' && C <= 'F');
+}
+} // unnamed namespace
+
+StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
+ return llvm::StringSwitch<StringRef>(Name)
+ .Case("amp", "&")
+ .Case("lt", "<")
+ .Case("gt", ">")
+ .Case("quot", "\"")
+ .Case("apos", "\'")
+ .Default("");
+}
+
+StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
+ unsigned CodePoint = 0;
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
+ CodePoint *= 10;
+ CodePoint += Name[i] - '0';
+ }
+
+ char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
+ char *ResolvedPtr = Resolved;
+ if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
+ return StringRef(Resolved, ResolvedPtr - Resolved);
+ else
+ return StringRef();
+}
+
+StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
+ unsigned CodePoint = 0;
+ for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ CodePoint *= 16;
+ const char C = Name[i];
+ assert(isHTMLHexCharacterReferenceCharacter(C));
+ if (C >= '0' && C <= '9')
+ CodePoint += Name[i] - '0';
+ else if (C >= 'a' && C <= 'f')
+ CodePoint += Name[i] - 'a' + 10;
+ else
+ CodePoint += Name[i] - 'A' + 10;
+ }
+
+ char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
+ char *ResolvedPtr = Resolved;
+ if (ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
+ return StringRef(Resolved, ResolvedPtr - Resolved);
+ else
+ return StringRef();
+}
+
void Lexer::skipLineStartingDecorations() {
// This function should be called only for C comments
assert(CommentState == LCS_InsideCComment);
return BufferPtr;
}
+const char *skipNamedCharacterReference(const char *BufferPtr,
+ const char *BufferEnd) {
+ for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
+ if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
+ return BufferPtr;
+ }
+ return BufferEnd;
+}
+
+const char *skipDecimalCharacterReference(const char *BufferPtr,
+ const char *BufferEnd) {
+ for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
+ if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
+ return BufferPtr;
+ }
+ return BufferEnd;
+}
+
+const char *skipHexCharacterReference(const char *BufferPtr,
+ const char *BufferEnd) {
+ for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
+ if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
+ return BufferPtr;
+ }
+ return BufferEnd;
+}
+
bool isHTMLIdentifierStartingCharacter(char C) {
return (C >= 'a' && C <= 'z') ||
(C >= 'A' && C <= 'Z');
case '@': {
TokenPtr++;
if (TokenPtr == CommentEnd) {
- StringRef Text(BufferPtr, TokenPtr - BufferPtr);
- formTokenWithChars(T, TokenPtr, tok::text);
- T.setText(Text);
+ formTextToken(T, TokenPtr);
return;
}
char C = *TokenPtr;
// Don't make zero-length commands.
if (!isCommandNameCharacter(*TokenPtr)) {
- StringRef Text(BufferPtr, TokenPtr - BufferPtr);
- formTokenWithChars(T, TokenPtr, tok::text);
- T.setText(Text);
+ formTextToken(T, TokenPtr);
return;
}
return;
}
+ case '&':
+ lexHTMLCharacterReference(T);
+ return;
+
case '<': {
TokenPtr++;
if (TokenPtr == CommentEnd) {
- StringRef Text(BufferPtr, TokenPtr - BufferPtr);
- formTokenWithChars(T, TokenPtr, tok::text);
- T.setText(Text);
+ formTextToken(T, TokenPtr);
return;
}
const char C = *TokenPtr;
setupAndLexHTMLStartTag(T);
else if (C == '/')
setupAndLexHTMLEndTag(T);
- else {
- StringRef Text(BufferPtr, TokenPtr - BufferPtr);
- formTokenWithChars(T, TokenPtr, tok::text);
- T.setText(Text);
- }
+ else
+ formTextToken(T, TokenPtr);
+
return;
}
break;
const char C = *TokenPtr;
if(C == '\n' || C == '\r' ||
- C == '\\' || C == '@' || C == '<')
+ C == '\\' || C == '@' || C == '&' || C == '<')
break;
}
- StringRef Text(BufferPtr, TokenPtr - BufferPtr);
- formTokenWithChars(T, TokenPtr, tok::text);
- T.setText(Text);
+ formTextToken(T, TokenPtr);
return;
}
}
State = LS_Normal;
}
+void Lexer::lexHTMLCharacterReference(Token &T) {
+ const char *TokenPtr = BufferPtr;
+ assert(*TokenPtr == '&');
+ TokenPtr++;
+ if (TokenPtr == CommentEnd) {
+ formTextToken(T, TokenPtr);
+ return;
+ }
+ const char *NamePtr;
+ bool isNamed = false;
+ bool isDecimal = false;
+ char C = *TokenPtr;
+ if (isHTMLNamedCharacterReferenceCharacter(C)) {
+ NamePtr = TokenPtr;
+ TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
+ isNamed = true;
+ } else if (C == '#') {
+ TokenPtr++;
+ if (TokenPtr == CommentEnd) {
+ formTextToken(T, TokenPtr);
+ return;
+ }
+ C = *TokenPtr;
+ if (isHTMLDecimalCharacterReferenceCharacter(C)) {
+ NamePtr = TokenPtr;
+ TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
+ isDecimal = true;
+ } else if (C == 'x' || C == 'X') {
+ TokenPtr++;
+ NamePtr = TokenPtr;
+ TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
+ } else {
+ formTextToken(T, TokenPtr);
+ return;
+ }
+ } else {
+ formTextToken(T, TokenPtr);
+ return;
+ }
+ if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
+ *TokenPtr != ';') {
+ formTextToken(T, TokenPtr);
+ return;
+ }
+ StringRef Name(NamePtr, TokenPtr - NamePtr);
+ TokenPtr++; // Skip semicolon.
+ StringRef Resolved;
+ if (isNamed)
+ Resolved = resolveHTMLNamedCharacterReference(Name);
+ else if (isDecimal)
+ Resolved = resolveHTMLDecimalCharacterReference(Name);
+ else
+ Resolved = resolveHTMLHexCharacterReference(Name);
+
+ if (Resolved.empty()) {
+ formTextToken(T, TokenPtr);
+ return;
+ }
+ formTokenWithChars(T, TokenPtr, tok::text);
+ T.setText(Resolved);
+ return;
+}
+
void Lexer::setupAndLexHTMLStartTag(Token &T) {
assert(BufferPtr[0] == '<' &&
isHTMLIdentifierStartingCharacter(BufferPtr[1]));
if (TokenPtr != CommentEnd && *TokenPtr == '>') {
TokenPtr++;
formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
- } else {
- StringRef Text(BufferPtr, TokenPtr - BufferPtr);
- formTokenWithChars(T, TokenPtr, tok::text);
- T.setText(Text);
- }
+ } else
+ formTextToken(T, TokenPtr);
+
State = LS_Normal;
return;
}
State = LS_Normal;
}
-Lexer::Lexer(SourceLocation FileLoc, const CommentOptions &CommOpts,
+Lexer::Lexer(llvm::BumpPtrAllocator &Allocator,
+ SourceLocation FileLoc, const CommentOptions &CommOpts,
const char *BufferStart, const char *BufferEnd):
+ Allocator(Allocator),
BufferStart(BufferStart), BufferEnd(BufferEnd),
FileLoc(FileLoc), CommOpts(CommOpts), BufferPtr(BufferStart),
CommentState(LCS_BeforeComment), State(LS_Normal) {
// Make sure that RawText is valid.
getRawText(Context.getSourceManager());
- comments::Lexer L(Range.getBegin(), comments::CommentOptions(),
+ // Since we will be copying the resulting text, all allocations made during
+ // parsing are garbage after resulting string is formed. Thus we can use
+ // a separate allocator for all temporary stuff.
+ llvm::BumpPtrAllocator Allocator;
+
+ comments::Lexer L(Allocator,
+ Range.getBegin(), comments::CommentOptions(),
RawText.begin(), RawText.end());
comments::BriefParser P(L);
/// & < > "
void comment_to_html_conversion_24();
+/// <em>0<i</em>
+void comment_to_html_conversion_25();
+
#endif
// RUN: rm -rf %t
// CHECK-NEXT: (CXComment_Text Text=[.])
// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
// CHECK-NEXT: (CXComment_Text Text=[::])))]
-// CHECK: annotate-comments.cpp:324:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[<p class="para-brief"> &amp; &lt; &gt; &quot;</p>]
-// CHECK: CommentAST=[
-// CHECK: (CXComment_FullComment
-// CHECK: (CXComment_Paragraph
-// CHECK: (CXComment_Text Text=[ & < > "])))]
+// CHECK: annotate-comments.cpp:324:6: FunctionDecl=comment_to_html_conversion_24:{{.*}} FullCommentAsHTML=[<p class="para-brief"> & < > "</p>]
+// CHECK-NEXT: CommentAST=[
+// CHECK-NEXT: (CXComment_FullComment
+// CHECK-NEXT: (CXComment_Paragraph
+// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
+// CHECK-NEXT: (CXComment_Text Text=[&])
+// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
+// CHECK-NEXT: (CXComment_Text Text=[<])
+// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
+// CHECK-NEXT: (CXComment_Text Text=[>])
+// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
+// CHECK-NEXT: (CXComment_Text Text=["])))]
+// CHECK: annotate-comments.cpp:327:6: FunctionDecl=comment_to_html_conversion_25:{{.*}} FullCommentAsHTML=[<p class="para-brief"> <em>0<i</em></p>]
+// CHECK-NEXT: CommentAST=[
+// CHECK-NEXT: (CXComment_FullComment
+// CHECK-NEXT: (CXComment_Paragraph
+// CHECK-NEXT: (CXComment_Text Text=[ ] IsWhitespace)
+// CHECK-NEXT: (CXComment_HTMLStartTag Name=[em])
+// CHECK-NEXT: (CXComment_Text Text=[0])
+// CHECK-NEXT: (CXComment_Text Text=[<])
+// CHECK-NEXT: (CXComment_Text Text=[i])
+// CHECK-NEXT: (CXComment_HTMLEndTag Name=[em])))]
IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
DiagnosticsEngine Diags;
SourceManager SourceMgr;
+ llvm::BumpPtrAllocator Allocator;
void lexString(const char *Source, std::vector<Token> &Toks);
};
FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
- comments::Lexer L(Begin, CommentOptions(),
+ comments::Lexer L(Allocator, Begin, CommentOptions(),
Source, Source + strlen(Source));
while (1) {
}
}
+TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
+ const char *Source = "// &";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&"), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
+ const char *Source = "// &!";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("!"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
+ const char *Source = "// &";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&"), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
+ const char *Source = "// &!";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("!"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
+ const char *Source = "// &#";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&#"), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
+ const char *Source = "// &#a";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&#"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("a"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
+ const char *Source = "// *";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("*"), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
+ const char *Source = "// *a";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("*"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("a"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
+ const char *Source = "// &#x";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
+ const char *Source = "// &#xz";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&#x"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("z"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
+ const char *Source = "// «";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("«"), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
+ const char *Source = "// «z";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("«"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("z"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
+ const char *Source = "// &";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&"), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
+ const char *Source = "// &<";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef("<"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
+ const char *Source = "// & meow";
+
+ std::vector<Token> Toks;
+
+ lexString(Source, Toks);
+
+ ASSERT_EQ(4U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("&"), Toks[1].getText());
+
+ ASSERT_EQ(tok::text, Toks[2].getKind());
+ ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
+
+ ASSERT_EQ(tok::newline, Toks[3].getKind());
+}
+
+TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
+ const char *Sources[] = {
+ "// =",
+ "// =",
+ "// ="
+ };
+
+ for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+ std::vector<Token> Toks;
+
+ lexString(Sources[i], Toks);
+
+ ASSERT_EQ(3U, Toks.size());
+
+ ASSERT_EQ(tok::text, Toks[0].getKind());
+ ASSERT_EQ(StringRef(" "), Toks[0].getText());
+
+ ASSERT_EQ(tok::text, Toks[1].getKind());
+ ASSERT_EQ(StringRef("="), Toks[1].getText());
+
+ ASSERT_EQ(tok::newline, Toks[2].getKind());
+ }
+}
+
TEST_F(CommentLexerTest, MultipleComments) {
const char *Source =
"// Aaa\n"
FileID File = SourceMgr.createFileIDForMemBuffer(Buf);
SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
- comments::Lexer L(Begin, CommentOptions(),
+ comments::Lexer L(Allocator, Begin, CommentOptions(),
Source, Source + strlen(Source));
comments::Sema S(Allocator, SourceMgr, Diags);