From 983f9af008ef293025c0e0198f0726ee64ffc793 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 16 Oct 2017 22:20:03 +0000 Subject: [PATCH] [MC] Lex CRLF as one token This will prevent doubling of line endings when parsing assembly and emitting assembly. Otherwise we'd parse the directive, consume the end of statement, hit the next end of statement, and emit a fresh newline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315943 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmLexer.cpp | 10 +++++++++- test/MC/X86/crlf.test | 5 +++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 test/MC/X86/crlf.test diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index e9123b9d714..b83b6d3dcf6 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -606,8 +606,16 @@ AsmToken AsmLexer::LexToken() { return LexToken(); // Ignore whitespace. else return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart)); + case '\r': { + IsAtStartOfLine = true; + IsAtStartOfStatement = true; + // If this is a CR followed by LF, treat that as one token. + if (CurPtr != CurBuf.end() && *CurPtr == '\n') + ++CurPtr; + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, CurPtr - TokStart)); + } case '\n': - case '\r': IsAtStartOfLine = true; IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); diff --git a/test/MC/X86/crlf.test b/test/MC/X86/crlf.test new file mode 100644 index 00000000000..66014456138 --- /dev/null +++ b/test/MC/X86/crlf.test @@ -0,0 +1,5 @@ +RUN: printf '\r\n\r\n' | llvm-mc -as-lex | FileCheck %s +There should only be two end of statements. +CHECK: EndOfStatement +CHECK: EndOfStatement +CHECK-NOT: EndOfStatement -- 2.50.1