From: Volodymyr Sapsai Date: Fri, 12 Jan 2018 18:54:35 +0000 (+0000) Subject: [Lex] Avoid out-of-bounds dereference in LexAngledStringLiteral. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=95cd7fb4765e803ec208b6289ca5d5a5bf5ca189;p=clang [Lex] Avoid out-of-bounds dereference in LexAngledStringLiteral. Fix makes the loop in LexAngledStringLiteral more like the loops in LexStringLiteral, LexCharConstant. When we skip a character after backslash, we need to check if we reached the end of the file instead of reading the next character unconditionally. Discovered by OSS-Fuzz: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=3832 rdar://problem/35572754 Reviewers: arphaman, kcc, rsmith, dexonsmith Reviewed By: rsmith, dexonsmith Subscribers: cfe-commits, rsmith, dexonsmith Differential Revision: https://reviews.llvm.org/D41423 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@322390 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp index 830354ab23..8bd4ab0ff9 100644 --- a/lib/Lex/Lexer.cpp +++ b/lib/Lex/Lexer.cpp @@ -2009,18 +2009,21 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) { const char *AfterLessPos = CurPtr; char C = getAndAdvanceChar(CurPtr, Result); while (C != '>') { - // Skip escaped characters. - if (C == '\\' && CurPtr < BufferEnd) { - // Skip the escaped character. - getAndAdvanceChar(CurPtr, Result); - } else if (C == '\n' || C == '\r' || // Newline. - (C == 0 && (CurPtr-1 == BufferEnd || // End of file. - isCodeCompletionPoint(CurPtr-1)))) { + // Skip escaped characters. Escaped newlines will already be processed by + // getAndAdvanceChar. + if (C == '\\') + C = getAndAdvanceChar(CurPtr, Result); + + if (C == '\n' || C == '\r' || // Newline. + (C == 0 && (CurPtr-1 == BufferEnd || // End of file. + isCodeCompletionPoint(CurPtr-1)))) { // If the filename is unterminated, then it must just be a lone < // character. Return this as such. FormTokenWithChars(Result, AfterLessPos, tok::less); return true; - } else if (C == 0) { + } + + if (C == 0) { NulCharacter = CurPtr-1; } C = getAndAdvanceChar(CurPtr, Result); diff --git a/test/Lexer/null-character-in-literal.c b/test/Lexer/null-character-in-literal.c new file mode 100644 index 0000000000..a479547536 Binary files /dev/null and b/test/Lexer/null-character-in-literal.c differ diff --git a/unittests/Lex/LexerTest.cpp b/unittests/Lex/LexerTest.cpp index 746d08a43e..216672a90d 100644 --- a/unittests/Lex/LexerTest.cpp +++ b/unittests/Lex/LexerTest.cpp @@ -475,6 +475,8 @@ TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { TEST_F(LexerTest, AvoidPastEndOfStringDereference) { EXPECT_TRUE(Lex(" // \\\n").empty()); + EXPECT_TRUE(Lex("#include <\\\\").empty()); + EXPECT_TRUE(Lex("#include <\\\\\n").empty()); } TEST_F(LexerTest, StringizingRasString) {