const char *CurPtr;
StringRef CurBuf;
- bool isAtStartOfLine;
+ bool IsAtStartOfLine;
+ bool IsAtStartOfStatement;
void operator=(const AsmLexer&) = delete;
AsmLexer(const AsmLexer&) = delete;
void setBuffer(StringRef Buf, const char *ptr = nullptr);
StringRef LexUntilEndOfStatement() override;
- StringRef LexUntilEndOfLine();
size_t peekTokens(MutableArrayRef<AsmToken> Buf,
bool ShouldSkipSpace = true) override;
- bool isAtStartOfComment(const char *Ptr);
- bool isAtStatementSeparator(const char *Ptr);
-
const MCAsmInfo &getMAI() const { return MAI; }
private:
+ bool isAtStartOfComment(const char *Ptr);
+ bool isAtStatementSeparator(const char *Ptr);
int getNextChar();
AsmToken ReturnError(const char *Loc, const std::string &Msg);
AsmToken LexQuote();
AsmToken LexFloatLiteral();
AsmToken LexHexFloatLiteral(bool NoIntDigits);
+
+ StringRef LexUntilEndOfLine();
};
} // end namespace llvm
// Real values.
Real,
+ // Comments
+ Comment,
+ HashDirective,
// No-value.
EndOfStatement,
Colon,
Space,
Plus, Minus, Tilde,
- Slash, // '/'
+ Slash, // '/'
BackSlash, // '\'
LParen, RParen, LBrac, RBrac, LCurly, RCurly,
Star, Dot, Comma, Dollar, Equal, EqualEqual,
const AsmToken &Lex() {
assert(!CurTok.empty());
CurTok.erase(CurTok.begin());
- if (CurTok.empty())
- CurTok.emplace_back(LexToken());
+ // LexToken may generate multiple tokens via UnLex but will always return
+ // the first one. Place returned value at head of CurTok vector.
+ if (CurTok.empty()) {
+ AsmToken T = LexToken();
+ CurTok.insert(CurTok.begin(), T);
+ }
return CurTok.front();
}
AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
CurPtr = nullptr;
- isAtStartOfLine = true;
+ IsAtStartOfLine = true;
+ IsAtStartOfStatement = true;
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
}
}
int AsmLexer::getNextChar() {
- char CurChar = *CurPtr++;
- switch (CurChar) {
- default:
- return (unsigned char)CurChar;
- case 0:
- // A nul character in the stream is either the end of the current buffer or
- // a random nul in the file. Disambiguate that here.
- if (CurPtr - 1 != CurBuf.end())
- return 0; // Just whitespace.
-
- // Otherwise, return end of file.
- --CurPtr; // Another call to lex will return EOF again.
+ if (CurPtr == CurBuf.end())
return EOF;
- }
+ return (unsigned char)*CurPtr++;
}
/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
AsmToken AsmLexer::LexSlash() {
switch (*CurPtr) {
case '*':
+ IsAtStartOfStatement = false;
break; // C style comment.
case '/':
++CurPtr;
return LexLineComment();
default:
- return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1));
+ IsAtStartOfStatement = false;
+ return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
}
// C Style comment.
++CurPtr; // skip the star.
- while (1) {
- int CurChar = getNextChar();
- switch (CurChar) {
- case EOF:
- return ReturnError(TokStart, "unterminated comment");
+ while (CurPtr != CurBuf.end()) {
+ switch (*CurPtr++) {
case '*':
// End of the comment?
- if (CurPtr[0] != '/') break;
-
+ if (*CurPtr != '/')
+ break;
++CurPtr; // End the */.
- return LexToken();
+ return AsmToken(AsmToken::Comment,
+ StringRef(TokStart, CurPtr - TokStart));
}
}
+ return ReturnError(TokStart, "unterminated comment");
}
/// LexLineComment: Comment: #[^\n]*
/// : //[^\n]*
AsmToken AsmLexer::LexLineComment() {
- // FIXME: This is broken if we happen to a comment at the end of a file, which
- // was .included, and which doesn't end with a newline.
+ // Mark This as an end of statement with a body of the
+ // comment. While it would be nicer to leave this two tokens,
+ // backwards compatability with TargetParsers makes keeping this in this form
+ // better.
int CurChar = getNextChar();
while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
CurChar = getNextChar();
- if (CurChar == EOF)
- return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
- return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
+ IsAtStartOfLine = true;
+ // Whis is a whole line comment. leave newline
+ if (IsAtStartOfStatement)
+ return AsmToken(AsmToken::EndOfStatement,
+ StringRef(TokStart, CurPtr - TokStart));
+ IsAtStartOfStatement = true;
+
+ return AsmToken(AsmToken::EndOfStatement,
+ StringRef(TokStart, CurPtr - 1 - TokStart));
}
static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
while (!isAtStartOfComment(CurPtr) && // Start of line comment.
!isAtStatementSeparator(CurPtr) && // End of statement marker.
- *CurPtr != '\n' && *CurPtr != '\r' &&
- (*CurPtr != 0 || CurPtr != CurBuf.end())) {
+ *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
StringRef AsmLexer::LexUntilEndOfLine() {
TokStart = CurPtr;
- while (*CurPtr != '\n' && *CurPtr != '\r' &&
- (*CurPtr != 0 || CurPtr != CurBuf.end())) {
+ while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
++CurPtr;
}
return StringRef(TokStart, CurPtr-TokStart);
bool ShouldSkipSpace) {
const char *SavedTokStart = TokStart;
const char *SavedCurPtr = CurPtr;
- bool SavedAtStartOfLine = isAtStartOfLine;
+ bool SavedAtStartOfLine = IsAtStartOfLine;
+ bool SavedAtStartOfStatement = IsAtStartOfStatement;
bool SavedSkipSpace = SkipSpace;
std::string SavedErr = getErr();
SetError(SavedErrLoc, SavedErr);
SkipSpace = SavedSkipSpace;
- isAtStartOfLine = SavedAtStartOfLine;
+ IsAtStartOfLine = SavedAtStartOfLine;
+ IsAtStartOfStatement = SavedAtStartOfStatement;
CurPtr = SavedCurPtr;
TokStart = SavedTokStart;
// This always consumes at least one character.
int CurChar = getNextChar();
- if (isAtStartOfComment(TokStart)) {
- // If this comment starts with a '#', then return the Hash token and let
- // the assembler parser see if it can be parsed as a cpp line filename
- // comment. We do this only if we are at the start of a line.
- if (CurChar == '#' && isAtStartOfLine)
- return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
- isAtStartOfLine = true;
+ if (CurChar == '#' && IsAtStartOfStatement) {
+ // If this starts with a '#', this may be a cpp
+ // hash directive and otherwise a line comment.
+ AsmToken TokenBuf[2];
+ MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
+ size_t num = peekTokens(Buf, true);
+ // There cannot be a space preceeding this
+ if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
+ TokenBuf[1].is(AsmToken::String)) {
+ CurPtr = TokStart; // reset curPtr;
+ StringRef s = LexUntilEndOfLine();
+ UnLex(TokenBuf[1]);
+ UnLex(TokenBuf[0]);
+ return AsmToken(AsmToken::HashDirective, s);
+ }
return LexLineComment();
}
+
+ if (isAtStartOfComment(TokStart))
+ return LexLineComment();
+
if (isAtStatementSeparator(TokStart)) {
CurPtr += strlen(MAI.getSeparatorString()) - 1;
+ IsAtStartOfLine = true;
+ IsAtStartOfStatement = true;
return AsmToken(AsmToken::EndOfStatement,
StringRef(TokStart, strlen(MAI.getSeparatorString())));
}
// If we're missing a newline at EOF, make sure we still get an
// EndOfStatement token before the Eof token.
- if (CurChar == EOF && !isAtStartOfLine) {
- isAtStartOfLine = true;
+ if (CurChar == EOF && !IsAtStartOfStatement) {
+ IsAtStartOfLine = true;
+ IsAtStartOfStatement = true;
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
}
-
- isAtStartOfLine = false;
+ IsAtStartOfLine = false;
+ bool OldIsAtStartOfStatement = IsAtStartOfStatement;
+ IsAtStartOfStatement = false;
switch (CurChar) {
default:
// Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
// Unknown character, emit an error.
return ReturnError(TokStart, "invalid character in input");
- case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+ case EOF:
+ IsAtStartOfLine = true;
+ IsAtStartOfStatement = true;
+ return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
case 0:
case ' ':
case '\t':
- if (SkipSpace) {
- // Ignore whitespace.
- return LexToken();
- } else {
- int len = 1;
- while (*CurPtr==' ' || *CurPtr=='\t') {
- CurPtr++;
- len++;
- }
- return AsmToken(AsmToken::Space, StringRef(TokStart, len));
- }
- case '\n': // FALL THROUGH.
+ IsAtStartOfStatement = OldIsAtStartOfStatement;
+ while (*CurPtr == ' ' || *CurPtr == '\t')
+ CurPtr++;
+ if (SkipSpace)
+ return LexToken(); // Ignore whitespace.
+ else
+ return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
+ case '\n':
case '\r':
- isAtStartOfLine = true;
+ IsAtStartOfLine = true;
+ IsAtStartOfStatement = true;
return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
}
return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
- case '/': return LexSlash();
+ case '/':
+ IsAtStartOfStatement = OldIsAtStartOfStatement;
+ return LexSlash();
case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
case '\'': return LexSingleQuote();
case '"': return LexQuote();
bool parseStatement(ParseStatementInfo &Info,
MCAsmParserSemaCallback *SI);
bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
- void eatToEndOfLine();
bool parseCppHashLineFilenameComment(SMLoc L);
void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
Error(Lexer.getErrLoc(), Lexer.getErr());
const AsmToken *tok = &Lexer.Lex();
+ // Drop comments here.
+ while (tok->is(AsmToken::Comment)) {
+ tok = &Lexer.Lex();
+ }
if (tok->is(AsmToken::Eof)) {
// If this is the end of an included file, pop the parent file off the
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
jumpToLoc(ParentIncludeLoc);
- tok = &Lexer.Lex();
+ return Lex();
}
}
// first referenced for a source location. We need to add something
// to track that. Currently, we just point to the end of the file.
HadError |=
- Error(getLexer().getLoc(), "assembler local symbol '" +
- Sym->getName() + "' not defined");
+ Error(getTok().getLoc(), "assembler local symbol '" +
+ Sym->getName() + "' not defined");
}
}
const char *Start = getTok().getLoc().getPointer();
while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
- Lex();
+ Lexer.Lex();
const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
while (Lexer.isNot(AsmToken::EndOfStatement) &&
Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof))
- Lex();
+ Lexer.Lex();
const char *End = getTok().getLoc().getPointer();
return StringRef(Start, End - Start);
if (!MAI.useParensForSymbolVariant()) {
if (FirstTokenKind == AsmToken::String) {
if (Lexer.is(AsmToken::At)) {
- Lexer.Lex(); // eat @
+ Lex(); // eat @
SMLoc AtLoc = getLexer().getLoc();
StringRef VName;
if (parseIdentifier(VName))
Split = Identifier.split('@');
}
} else if (Lexer.is(AsmToken::LParen)) {
- Lexer.Lex(); // eat (
+ Lex(); // eat '('.
StringRef VName;
parseIdentifier(VName);
if (Lexer.isNot(AsmToken::RParen)) {
return Error(Lexer.getTok().getLoc(),
"unexpected token in variant, expected ')'");
}
- Lexer.Lex(); // eat )
+ Lex(); // eat ')'.
Split = std::make_pair(Identifier, VName);
}
/// ::= Label* Identifier OperandList* EndOfStatement
bool AsmParser::parseStatement(ParseStatementInfo &Info,
MCAsmParserSemaCallback *SI) {
+ // Eat initial spaces and comments
+ while (Lexer.is(AsmToken::Space))
+ Lex();
if (Lexer.is(AsmToken::EndOfStatement)) {
- Out.AddBlankLine();
+ // if this is a line comment we can drop it safely
+ if (getTok().getString().front() == '\r' ||
+ getTok().getString().front() == '\n')
+ Out.AddBlankLine();
Lex();
return false;
}
-
- // Statements always start with an identifier or are a full line comment.
+ // Statements always start with an identifier.
AsmToken ID = getTok();
SMLoc IDLoc = ID.getLoc();
StringRef IDVal;
int64_t LocalLabelVal = -1;
- // A full line comment is a '#' as the first token.
- if (Lexer.is(AsmToken::Hash))
+ if (Lexer.is(AsmToken::HashDirective))
return parseCppHashLineFilenameComment(IDLoc);
-
// Allow an integer followed by a ':' as a directional local label.
if (Lexer.is(AsmToken::Integer)) {
LocalLabelVal = getTok().getIntVal();
return parseDirectiveIncbin();
case DK_CODE16:
case DK_CODE16GCC:
- return TokError(Twine(IDVal) + " not supported yet");
+ return TokError(Twine(IDVal) +
+ " not currently supported for this target");
case DK_REPT:
return parseDirectiveRept(IDLoc, IDVal);
case DK_IRP:
return true;
}
-/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line
-/// since they may not be able to be tokenized to get to the end of line token.
-void AsmParser::eatToEndOfLine() {
- if (!Lexer.is(AsmToken::EndOfStatement))
- Lexer.LexUntilEndOfLine();
- // Eat EOL.
- Lex();
-}
-
/// parseCppHashLineFilenameComment as this:
/// ::= # number "filename"
-/// or just as a full line comment if it doesn't have a number and a string.
bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
Lex(); // Eat the hash token.
-
- if (getLexer().isNot(AsmToken::Integer)) {
- // Consume the line since in cases it is not a well-formed line directive,
- // as if were simply a full line comment.
- eatToEndOfLine();
- return false;
- }
-
+ // Lexer only ever emits HashDirective if it fully formed if it's
+ // done the checking already so this is an internal error.
+ assert(getTok().is(AsmToken::Integer) &&
+ "Lexing Cpp line comment: Expected Integer");
int64_t LineNumber = getTok().getIntVal();
Lex();
-
- if (getLexer().isNot(AsmToken::String)) {
- eatToEndOfLine();
- return false;
- }
-
+ assert(getTok().is(AsmToken::String) &&
+ "Lexing Cpp line comment: Expected String");
StringRef Filename = getTok().getString();
+ Lex();
// Get rid of the enclosing quotes.
Filename = Filename.substr(1, Filename.size() - 2);
CppHashInfo.Filename = Filename;
CppHashInfo.LineNumber = LineNumber;
CppHashInfo.Buf = CurBuffer;
-
- // Ignore any trailing characters, they're just comment.
- eatToEndOfLine();
return false;
}
break;
if (FAI >= NParameters) {
- assert(M && "expected macro to be defined");
+ assert(M && "expected macro to be defined");
Error(IDLoc,
"parameter named '" + FA.Name + "' does not exist for macro '" +
M->Name + "'");
// Construct the joined identifier and consume the token.
Res =
StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
- Lexer.Lex(); // Lexer's Lex guarantees consecutive token
+ Lex(); // Parser Lex to maintain invariants.
return false;
}
if (Lexer.isNot(AsmToken::Comma))
return TokError("expected comma");
- Lexer.Lex();
+ Lex();
if (Lexer.isNot(AsmToken::Identifier))
return TokError("expected relocation name");
SMLoc NameLoc = Lexer.getTok().getLoc();
StringRef Name = Lexer.getTok().getIdentifier();
- Lexer.Lex();
+ Lex();
if (Lexer.is(AsmToken::Comma)) {
- Lexer.Lex();
+ Lex();
SMLoc ExprLoc = Lexer.getLoc();
if (parseExpression(Expr))
return true;
bool parseAssignmentExpression(StringRef Name, bool allow_redef,
MCAsmParser &Parser, MCSymbol *&Sym,
const MCExpr *&Value) {
- MCAsmLexer &Lexer = Parser.getLexer();
// FIXME: Use better location, we should use proper tokens.
- SMLoc EqualLoc = Lexer.getLoc();
+ SMLoc EqualLoc = Parser.getTok().getLoc();
if (Parser.parseExpression(Value)) {
Parser.TokError("missing expression");
// a = b
// b = c
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (Parser.getTok().isNot(AsmToken::EndOfStatement))
return Parser.TokError("unexpected token in assignment");
// Eat the end of statement marker.
StringRef Arch = Parser.getTok().getString();
SMLoc ArchLoc = Parser.getTok().getLoc();
- getLexer().Lex();
+ Lex();
unsigned ID = ARM::parseArch(Arch);
StringRef Name = Parser.getTok().getString();
SMLoc ExtLoc = Parser.getTok().getLoc();
- getLexer().Lex();
+ Lex();
bool EnableFeature = true;
if (Name.startswith_lower("no")) {
bool HexagonAsmParser::matchBundleOptions() {
MCAsmParser &Parser = getParser();
- MCAsmLexer &Lexer = getLexer();
while (true) {
if (!Parser.getTok().is(AsmToken::Colon))
return false;
- Lexer.Lex();
+ Lex();
StringRef Option = Parser.getTok().getString();
if (Option.compare_lower("endloop0") == 0)
HexagonMCInstrInfo::setInnerLoop(MCB);
HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
else
return true;
- Lexer.Lex();
+ Lex();
}
}
AsmToken const &Token = getParser().getTok();
StringRef String = Token.getString();
SMLoc Loc = Token.getLoc();
- getLexer().Lex();
+ Lex();
do {
std::pair<StringRef, StringRef> HeadTail = String.split('.');
if (!HeadTail.first.empty())
static char const * Comma = ",";
do {
Tokens.emplace_back (Lexer.getTok());
- Lexer.Lex();
+ Lex();
switch (Tokens.back().getKind())
{
case AsmToken::TokenKind::Hash:
AsmToken const &Token = Parser.getTok();
switch (Token.getKind()) {
case AsmToken::EndOfStatement: {
- Lexer.Lex();
+ Lex();
return false;
}
case AsmToken::LCurly: {
return true;
Operands.push_back(
HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
- Lexer.Lex();
+ Lex();
return false;
}
case AsmToken::RCurly: {
if (Operands.empty()) {
Operands.push_back(
HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
- Lexer.Lex();
+ Lex();
}
return false;
}
case AsmToken::Comma: {
- Lexer.Lex();
+ Lex();
continue;
}
case AsmToken::EqualEqual:
Token.getString().substr(0, 1), Token.getLoc()));
Operands.push_back(HexagonOperand::CreateToken(
Token.getString().substr(1, 1), Token.getLoc()));
- Lexer.Lex();
+ Lex();
continue;
}
case AsmToken::Hash: {
if (!ImplicitExpression)
Operands.push_back(
HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
- Lexer.Lex();
+ Lex();
bool MustExtend = false;
bool HiOnly = false;
bool LoOnly = false;
if (Lexer.is(AsmToken::Hash)) {
- Lexer.Lex();
+ Lex();
MustExtend = true;
} else if (ImplicitExpression)
MustNotExtend = true;
HiOnly = false;
LoOnly = false;
} else {
- Lexer.Lex();
+ Lex();
}
}
}
// Parse until end of statement, consuming commas between operands
while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.is(AsmToken::Comma)) {
// Consume comma token
- Lexer.Lex();
+ Lex();
// Parse next operand
if (parseOperand(&Operands, Mnemonic) != MatchOperand_Success)
while (getLexer().isNot(AsmToken::EndOfStatement) &&
getLexer().is(AsmToken::Comma)) {
// Consume the comma token
- getLexer().Lex();
+ Lex();
// Parse the next operand
if (ParseOperand(Operands))
--- /dev/null
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 | FileCheck %s
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+# 3 "FILE1" 1 #<- This is a CPP Hash w/ comment
+error
+# CHECK: FILE1:3:1: error
+# 0 "" 2 #<- This is too
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+ # 1 "FILE2" 2 #<- This is a comment
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+nop; # 6 "FILE3" 2 #<- This is a still comment
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+nop;# 6 "FILE4" 2
+ nop;
+error
+# CHECK: FILE4:7:1: error
+# 0 "" 2
+/*comment*/# 6 "FILE5" 2 #<- This is a comment
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error