From: Nirav Dave Date: Sat, 1 Oct 2016 00:42:32 +0000 (+0000) Subject: [MC] Prevent out of order HashDirective lexing in AsmLexer. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=28901d924e29c900e4c6b213bd4431edf00e5970;p=llvm [MC] Prevent out of order HashDirective lexing in AsmLexer. To lex hash directives we peek ahead to find component tokens, create a unified token, and unlex the peeked tokens so the parser does not need to parse the tokens then. Make sure we do not to lex another hash directive during peek operation. This fixes PR28921. Reviewers: rnk, loladiro Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D24839 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282992 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index 06937e25f59..029598c013d 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -32,7 +32,7 @@ class AsmLexer : public MCAsmLexer { bool IsAtStartOfLine; bool IsAtStartOfStatement; bool IsParsingMSInlineAsm; - + bool IsPeeking; void operator=(const AsmLexer&) = delete; AsmLexer(const AsmLexer&) = delete; diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index c1f71bcc368..d0c8bce0382 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -11,29 +11,29 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCParser/AsmLexer.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCParser/AsmLexer.h" -#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/Support/SMLoc.h" +#include "llvm/Support/SaveAndRestore.h" #include #include #include #include -#include #include +#include #include using namespace llvm; -AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) { - CurPtr = nullptr; - IsAtStartOfLine = true; - IsAtStartOfStatement = true; - IsParsingMSInlineAsm = false; +AsmLexer::AsmLexer(const MCAsmInfo &MAI) + : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true), + IsAtStartOfStatement(true), IsParsingMSInlineAsm(false), + IsPeeking(false) { AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); } @@ -487,17 +487,15 @@ StringRef AsmLexer::LexUntilEndOfLine() { size_t AsmLexer::peekTokens(MutableArrayRef Buf, bool ShouldSkipSpace) { - const char *SavedTokStart = TokStart; - const char *SavedCurPtr = CurPtr; - bool SavedAtStartOfLine = IsAtStartOfLine; - bool SavedAtStartOfStatement = IsAtStartOfStatement; - bool SavedSkipSpace = SkipSpace; - + SaveAndRestore SavedTokenStart(TokStart); + SaveAndRestore SavedCurPtr(CurPtr); + SaveAndRestore SavedAtStartOfLine(IsAtStartOfLine); + SaveAndRestore SavedAtStartOfStatement(IsAtStartOfStatement); + SaveAndRestore SavedSkipSpace(SkipSpace, ShouldSkipSpace); + SaveAndRestore SavedIsPeeking(IsPeeking, true); std::string SavedErr = getErr(); SMLoc SavedErrLoc = getErrLoc(); - SkipSpace = ShouldSkipSpace; - size_t ReadCount; for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) { AsmToken Token = LexToken(); @@ -509,13 +507,6 @@ size_t AsmLexer::peekTokens(MutableArrayRef Buf, } SetError(SavedErrLoc, SavedErr); - - SkipSpace = SavedSkipSpace; - IsAtStartOfLine = SavedAtStartOfLine; - IsAtStartOfStatement = SavedAtStartOfStatement; - CurPtr = SavedCurPtr; - TokStart = SavedTokStart; - return ReadCount; } @@ -525,7 +516,7 @@ bool AsmLexer::isAtStartOfComment(const char *Ptr) { if (CommentString[1] == '\0') return CommentString[0] == Ptr[0]; - // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin + // Allow # preprocessor commments also be counted as comments for "##" cases if (CommentString[1] == '#') return CommentString[0] == Ptr[0]; @@ -542,7 +533,7 @@ AsmToken AsmLexer::LexToken() { // This always consumes at least one character. int CurChar = getNextChar(); - if (CurChar == '#' && IsAtStartOfStatement) { + if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { // If this starts with a '#', this may be a cpp // hash directive and otherwise a line comment. AsmToken TokenBuf[2]; diff --git a/test/MC/AsmParser/pr28921.s b/test/MC/AsmParser/pr28921.s new file mode 100644 index 00000000000..2fbb555f4f3 --- /dev/null +++ b/test/MC/AsmParser/pr28921.s @@ -0,0 +1,8 @@ +// RUN: llvm-mc -triple i386-unknown-unknown %s + +# 1 "kernel.S" +# 1 "" 1 +# 1 "kernel.S" 2 +## +# 10 "kernel.S" +##