Refactor and cleanup Assembly Parsing / Lexing

author Nirav Dave <niravd@google.com>

Fri, 17 Jun 2016 16:06:17 +0000 (16:06 +0000)

committer Nirav Dave <niravd@google.com>

Fri, 17 Jun 2016 16:06:17 +0000 (16:06 +0000)
author Nirav Dave <niravd@google.com>
Fri, 17 Jun 2016 16:06:17 +0000 (16:06 +0000)
committer Nirav Dave <niravd@google.com>
Fri, 17 Jun 2016 16:06:17 +0000 (16:06 +0000)
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h

index 1bb6d212784eec327ae9fb2de4d1510a6c67fbec..c779121b6cf0df630b3d7560296a3b765b8596c4 100644 (file)
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -29,7 +29,8 @@ class AsmLexer : public MCAsmLexer {
  
    const char *CurPtr;
    StringRef CurBuf;
-  bool isAtStartOfLine;
+  bool IsAtStartOfLine;
+  bool IsAtStartOfStatement;
  
    void operator=(const AsmLexer&) = delete;
    AsmLexer(const AsmLexer&) = delete;
@@ -45,17 +46,15 @@ public:
    void setBuffer(StringRef Buf, const char *ptr = nullptr);
  
    StringRef LexUntilEndOfStatement() override;
-  StringRef LexUntilEndOfLine();
  
    size_t peekTokens(MutableArrayRef<AsmToken> Buf,
                      bool ShouldSkipSpace = true) override;
  
-  bool isAtStartOfComment(const char *Ptr);
-  bool isAtStatementSeparator(const char *Ptr);
-
    const MCAsmInfo &getMAI() const { return MAI; }
  
  private:
+  bool isAtStartOfComment(const char *Ptr);
+  bool isAtStatementSeparator(const char *Ptr);
    int getNextChar();
    AsmToken ReturnError(const char *Loc, const std::string &Msg);
  
@@ -67,6 +66,8 @@ private:
    AsmToken LexQuote();
    AsmToken LexFloatLiteral();
    AsmToken LexHexFloatLiteral(bool NoIntDigits);
+
+  StringRef LexUntilEndOfLine();
  };
  
  } // end namespace llvm
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h

index d632ddc9132e3a2d5cf4fd1fe021d859ce2df0ae..3dd22c93d363718787a59ba3cb3bb0496cda2a6e 100644 (file)
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -39,12 +39,15 @@ public:
      // Real values.
      Real,
  
+    // Comments
+    Comment,
+    HashDirective,
      // No-value.
      EndOfStatement,
      Colon,
      Space,
      Plus, Minus, Tilde,
-    Slash,    // '/'
+    Slash,     // '/'
      BackSlash, // '\'
      LParen, RParen, LBrac, RBrac, LCurly, RCurly,
      Star, Dot, Comma, Dollar, Equal, EqualEqual,
@@ -153,8 +156,12 @@ public:
    const AsmToken &Lex() {
      assert(!CurTok.empty());
      CurTok.erase(CurTok.begin());
-    if (CurTok.empty())
-      CurTok.emplace_back(LexToken());
+    // LexToken may generate multiple tokens via UnLex but will always return
+    // the first one. Place returned value at head of CurTok vector.
+    if (CurTok.empty()) {
+      AsmToken T = LexToken();
+      CurTok.insert(CurTok.begin(), T);
+    }
      return CurTok.front();
    }
  
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp

index f653304186bcb5a4b02e4a37e3676df08bb6a73e..d56071aea4df0602fbb3d5347f01425420fd93d5 100644 (file)
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -23,7 +23,8 @@ using namespace llvm;
  
  AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
    CurPtr = nullptr;
-  isAtStartOfLine = true;
+  IsAtStartOfLine = true;
+  IsAtStartOfStatement = true;
    AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
  }
  
@@ -50,20 +51,9 @@ AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
  }
  
  int AsmLexer::getNextChar() {
-  char CurChar = *CurPtr++;
-  switch (CurChar) {
-  default:
-    return (unsigned char)CurChar;
-  case 0:
-    // A nul character in the stream is either the end of the current buffer or
-    // a random nul in the file.  Disambiguate that here.
-    if (CurPtr - 1 != CurBuf.end())
-      return 0;  // Just whitespace.
-
-    // Otherwise, return end of file.
-    --CurPtr;  // Another call to lex will return EOF again.
+  if (CurPtr == CurBuf.end())
      return EOF;
-  }
+  return (unsigned char)*CurPtr++;
  }
  
  /// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
@@ -169,43 +159,52 @@ AsmToken AsmLexer::LexIdentifier() {
  AsmToken AsmLexer::LexSlash() {
    switch (*CurPtr) {
    case '*':
+    IsAtStartOfStatement = false;
      break; // C style comment.
    case '/':
      ++CurPtr;
      return LexLineComment();
    default:
-    return AsmToken(AsmToken::Slash, StringRef(CurPtr - 1, 1));
+    IsAtStartOfStatement = false;
+    return AsmToken(AsmToken::Slash, StringRef(TokStart, 1));
    }
  
    // C Style comment.
    ++CurPtr;  // skip the star.
-  while (1) {
-    int CurChar = getNextChar();
-    switch (CurChar) {
-    case EOF:
-      return ReturnError(TokStart, "unterminated comment");
+  while (CurPtr != CurBuf.end()) {
+    switch (*CurPtr++) {
      case '*':
        // End of the comment?
-      if (CurPtr[0] != '/') break;
-
+      if (*CurPtr != '/')
+        break;
        ++CurPtr;   // End the */.
-      return LexToken();
+      return AsmToken(AsmToken::Comment,
+                      StringRef(TokStart, CurPtr - TokStart));
      }
    }
+  return ReturnError(TokStart, "unterminated comment");
  }
  
  /// LexLineComment: Comment: #[^\n]*
  ///                        : //[^\n]*
  AsmToken AsmLexer::LexLineComment() {
-  // FIXME: This is broken if we happen to a comment at the end of a file, which
-  // was .included, and which doesn't end with a newline.
+  // Mark This as an end of statement with a body of the
+  // comment. While it would be nicer to leave this two tokens,
+  // backwards compatability with TargetParsers makes keeping this in this form
+  // better.
    int CurChar = getNextChar();
    while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
      CurChar = getNextChar();
  
-  if (CurChar == EOF)
-    return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
-  return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0));
+  IsAtStartOfLine = true;
+  // Whis is a whole line comment. leave newline
+  if (IsAtStartOfStatement)
+    return AsmToken(AsmToken::EndOfStatement,
+                    StringRef(TokStart, CurPtr - TokStart));
+  IsAtStartOfStatement = true;
+
+  return AsmToken(AsmToken::EndOfStatement,
+                  StringRef(TokStart, CurPtr - 1 - TokStart));
  }
  
  static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
@@ -423,8 +422,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
  
    while (!isAtStartOfComment(CurPtr) &&     // Start of line comment.
           !isAtStatementSeparator(CurPtr) && // End of statement marker.
-         *CurPtr != '\n' && *CurPtr != '\r' &&
-         (*CurPtr != 0 || CurPtr != CurBuf.end())) {
+         *CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
      ++CurPtr;
    }
    return StringRef(TokStart, CurPtr-TokStart);
@@ -433,8 +431,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
  StringRef AsmLexer::LexUntilEndOfLine() {
    TokStart = CurPtr;
  
-  while (*CurPtr != '\n' && *CurPtr != '\r' &&
-         (*CurPtr != 0 || CurPtr != CurBuf.end())) {
+  while (*CurPtr != '\n' && *CurPtr != '\r' && CurPtr != CurBuf.end()) {
      ++CurPtr;
    }
    return StringRef(TokStart, CurPtr-TokStart);
@@ -444,7 +441,8 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
                              bool ShouldSkipSpace) {
    const char *SavedTokStart = TokStart;
    const char *SavedCurPtr = CurPtr;
-  bool SavedAtStartOfLine = isAtStartOfLine;
+  bool SavedAtStartOfLine = IsAtStartOfLine;
+  bool SavedAtStartOfStatement = IsAtStartOfStatement;
    bool SavedSkipSpace = SkipSpace;
  
    std::string SavedErr = getErr();
@@ -465,7 +463,8 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
    SetError(SavedErrLoc, SavedErr);
  
    SkipSpace = SavedSkipSpace;
-  isAtStartOfLine = SavedAtStartOfLine;
+  IsAtStartOfLine = SavedAtStartOfLine;
+  IsAtStartOfStatement = SavedAtStartOfStatement;
    CurPtr = SavedCurPtr;
    TokStart = SavedTokStart;
  
@@ -495,29 +494,45 @@ AsmToken AsmLexer::LexToken() {
    // This always consumes at least one character.
    int CurChar = getNextChar();
  
-  if (isAtStartOfComment(TokStart)) {
-    // If this comment starts with a '#', then return the Hash token and let
-    // the assembler parser see if it can be parsed as a cpp line filename
-    // comment. We do this only if we are at the start of a line.
-    if (CurChar == '#' && isAtStartOfLine)
-      return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
-    isAtStartOfLine = true;
+  if (CurChar == '#' && IsAtStartOfStatement) {
+    // If this starts with a '#', this may be a cpp
+    // hash directive and otherwise a line comment.
+    AsmToken TokenBuf[2];
+    MutableArrayRef<AsmToken> Buf(TokenBuf, 2);
+    size_t num = peekTokens(Buf, true);
+    // There cannot be a space preceeding this
+    if (IsAtStartOfLine && num == 2 && TokenBuf[0].is(AsmToken::Integer) &&
+        TokenBuf[1].is(AsmToken::String)) {
+      CurPtr = TokStart; // reset curPtr;
+      StringRef s = LexUntilEndOfLine();
+      UnLex(TokenBuf[1]);
+      UnLex(TokenBuf[0]);
+      return AsmToken(AsmToken::HashDirective, s);
+    }
      return LexLineComment();
    }
+
+  if (isAtStartOfComment(TokStart))
+    return LexLineComment();
+
    if (isAtStatementSeparator(TokStart)) {
      CurPtr += strlen(MAI.getSeparatorString()) - 1;
+    IsAtStartOfLine = true;
+    IsAtStartOfStatement = true;
      return AsmToken(AsmToken::EndOfStatement,
                      StringRef(TokStart, strlen(MAI.getSeparatorString())));
    }
  
    // If we're missing a newline at EOF, make sure we still get an
    // EndOfStatement token before the Eof token.
-  if (CurChar == EOF && !isAtStartOfLine) {
-    isAtStartOfLine = true;
+  if (CurChar == EOF && !IsAtStartOfStatement) {
+    IsAtStartOfLine = true;
+    IsAtStartOfStatement = true;
      return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
    }
-
-  isAtStartOfLine = false;
+  IsAtStartOfLine = false;
+  bool OldIsAtStartOfStatement = IsAtStartOfStatement;
+  IsAtStartOfStatement = false;
    switch (CurChar) {
    default:
      // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
@@ -526,24 +541,24 @@ AsmToken AsmLexer::LexToken() {
  
      // Unknown character, emit an error.
      return ReturnError(TokStart, "invalid character in input");
-  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+  case EOF:
+    IsAtStartOfLine = true;
+    IsAtStartOfStatement = true;
+    return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
    case 0:
    case ' ':
    case '\t':
-    if (SkipSpace) {
-      // Ignore whitespace.
-      return LexToken();
-    } else {
-      int len = 1;
-      while (*CurPtr==' ' || *CurPtr=='\t') {
-        CurPtr++;
-        len++;
-      }
-      return AsmToken(AsmToken::Space, StringRef(TokStart, len));
-    }
-  case '\n': // FALL THROUGH.
+    IsAtStartOfStatement = OldIsAtStartOfStatement;
+    while (*CurPtr == ' ' || *CurPtr == '\t')
+      CurPtr++;
+    if (SkipSpace)
+      return LexToken(); // Ignore whitespace.
+    else
+      return AsmToken(AsmToken::Space, StringRef(TokStart, CurPtr - TokStart));
+  case '\n':
    case '\r':
-    isAtStartOfLine = true;
+    IsAtStartOfLine = true;
+    IsAtStartOfStatement = true;
      return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
    case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
    case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
@@ -586,7 +601,9 @@ AsmToken AsmLexer::LexToken() {
      }
      return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
    case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
-  case '/': return LexSlash();
+  case '/':
+    IsAtStartOfStatement = OldIsAtStartOfStatement;
+    return LexSlash();
    case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
    case '\'': return LexSingleQuote();
    case '"': return LexQuote();
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp

index 5b8c4ee5df19b75d48c264fdc091bc593de9a49b..9a331ec44582d93f25a27d02d551cbf681533113 100644 (file)
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -257,7 +257,6 @@ private:
    bool parseStatement(ParseStatementInfo &Info,
                        MCAsmParserSemaCallback *SI);
    bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
-  void eatToEndOfLine();
    bool parseCppHashLineFilenameComment(SMLoc L);
  
    void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
@@ -628,6 +627,10 @@ const AsmToken &AsmParser::Lex() {
      Error(Lexer.getErrLoc(), Lexer.getErr());
  
    const AsmToken *tok = &Lexer.Lex();
+  // Drop comments here.
+  while (tok->is(AsmToken::Comment)) {
+    tok = &Lexer.Lex();
+  }
  
    if (tok->is(AsmToken::Eof)) {
      // If this is the end of an included file, pop the parent file off the
@@ -635,7 +638,7 @@ const AsmToken &AsmParser::Lex() {
      SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
      if (ParentIncludeLoc != SMLoc()) {
        jumpToLoc(ParentIncludeLoc);
-      tok = &Lexer.Lex();
+      return Lex();
      }
    }
  
@@ -720,8 +723,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
            // first referenced for a source location. We need to add something
            // to track that. Currently, we just point to the end of the file.
            HadError |=
-              Error(getLexer().getLoc(), "assembler local symbol '" +
-                                             Sym->getName() + "' not defined");
+              Error(getTok().getLoc(), "assembler local symbol '" +
+                                           Sym->getName() + "' not defined");
        }
      }
  
@@ -766,7 +769,7 @@ StringRef AsmParser::parseStringToEndOfStatement() {
    const char *Start = getTok().getLoc().getPointer();
  
    while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof))
-    Lex();
+    Lexer.Lex();
  
    const char *End = getTok().getLoc().getPointer();
    return StringRef(Start, End - Start);
@@ -777,7 +780,7 @@ StringRef AsmParser::parseStringToComma() {
  
    while (Lexer.isNot(AsmToken::EndOfStatement) &&
           Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof))
-    Lex();
+    Lexer.Lex();
  
    const char *End = getTok().getLoc().getPointer();
    return StringRef(Start, End - Start);
@@ -859,7 +862,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
      if (!MAI.useParensForSymbolVariant()) {
        if (FirstTokenKind == AsmToken::String) {
          if (Lexer.is(AsmToken::At)) {
-          Lexer.Lex(); // eat @
+          Lex(); // eat @
            SMLoc AtLoc = getLexer().getLoc();
            StringRef VName;
            if (parseIdentifier(VName))
@@ -871,14 +874,14 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
          Split = Identifier.split('@');
        }
      } else if (Lexer.is(AsmToken::LParen)) {
-      Lexer.Lex(); // eat (
+      Lex(); // eat '('.
        StringRef VName;
        parseIdentifier(VName);
        if (Lexer.isNot(AsmToken::RParen)) {
            return Error(Lexer.getTok().getLoc(),
                         "unexpected token in variant, expected ')'");
        }
-      Lexer.Lex(); // eat )
+      Lex(); // eat ')'.
        Split = std::make_pair(Identifier, VName);
      }
  
@@ -1343,21 +1346,24 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
  ///   ::= Label* Identifier OperandList* EndOfStatement
  bool AsmParser::parseStatement(ParseStatementInfo &Info,
                                 MCAsmParserSemaCallback *SI) {
+  // Eat initial spaces and comments
+  while (Lexer.is(AsmToken::Space))
+    Lex();
    if (Lexer.is(AsmToken::EndOfStatement)) {
-    Out.AddBlankLine();
+    // if this is a line comment we can drop it safely
+    if (getTok().getString().front() == '\r' ||
+        getTok().getString().front() == '\n')
+      Out.AddBlankLine();
      Lex();
      return false;
    }
-
-  // Statements always start with an identifier or are a full line comment.
+  // Statements always start with an identifier.
    AsmToken ID = getTok();
    SMLoc IDLoc = ID.getLoc();
    StringRef IDVal;
    int64_t LocalLabelVal = -1;
-  // A full line comment is a '#' as the first token.
-  if (Lexer.is(AsmToken::Hash))
+  if (Lexer.is(AsmToken::HashDirective))
      return parseCppHashLineFilenameComment(IDLoc);
-
    // Allow an integer followed by a ':' as a directional local label.
    if (Lexer.is(AsmToken::Integer)) {
      LocalLabelVal = getTok().getIntVal();
@@ -1648,7 +1654,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
        return parseDirectiveIncbin();
      case DK_CODE16:
      case DK_CODE16GCC:
-      return TokError(Twine(IDVal) + " not supported yet");
+      return TokError(Twine(IDVal) +
+                      " not currently supported for this target");
      case DK_REPT:
        return parseDirectiveRept(IDLoc, IDVal);
      case DK_IRP:
@@ -1868,37 +1875,20 @@ AsmParser::parseCurlyBlockScope(SmallVectorImpl<AsmRewrite> &AsmStrRewrites) {
    return true;
  }
  
-/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line
-/// since they may not be able to be tokenized to get to the end of line token.
-void AsmParser::eatToEndOfLine() {
-  if (!Lexer.is(AsmToken::EndOfStatement))
-    Lexer.LexUntilEndOfLine();
-  // Eat EOL.
-  Lex();
-}
-
  /// parseCppHashLineFilenameComment as this:
  ///   ::= # number "filename"
-/// or just as a full line comment if it doesn't have a number and a string.
  bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
    Lex(); // Eat the hash token.
-
-  if (getLexer().isNot(AsmToken::Integer)) {
-    // Consume the line since in cases it is not a well-formed line directive,
-    // as if were simply a full line comment.
-    eatToEndOfLine();
-    return false;
-  }
-
+  // Lexer only ever emits HashDirective if it fully formed if it's
+  // done the checking already so this is an internal error.
+  assert(getTok().is(AsmToken::Integer) &&
+         "Lexing Cpp line comment: Expected Integer");
    int64_t LineNumber = getTok().getIntVal();
    Lex();
-
-  if (getLexer().isNot(AsmToken::String)) {
-    eatToEndOfLine();
-    return false;
-  }
-
+  assert(getTok().is(AsmToken::String) &&
+         "Lexing Cpp line comment: Expected String");
    StringRef Filename = getTok().getString();
+  Lex();
    // Get rid of the enclosing quotes.
    Filename = Filename.substr(1, Filename.size() - 2);
  
@@ -1907,9 +1897,6 @@ bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
    CppHashInfo.Filename = Filename;
    CppHashInfo.LineNumber = LineNumber;
    CppHashInfo.Buf = CurBuffer;
-
-  // Ignore any trailing characters, they're just comment.
-  eatToEndOfLine();
    return false;
  }
  
@@ -2268,7 +2255,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
            break;
  
        if (FAI >= NParameters) {
-    assert(M && "expected macro to be defined");
+        assert(M && "expected macro to be defined");
          Error(IDLoc,
                "parameter named '" + FA.Name + "' does not exist for macro '" +
                M->Name + "'");
@@ -2426,7 +2413,7 @@ bool AsmParser::parseIdentifier(StringRef &Res) {
      // Construct the joined identifier and consume the token.
      Res =
          StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
-    Lexer.Lex(); // Lexer's Lex guarantees consecutive token
+    Lex(); // Parser Lex to maintain invariants.
      return false;
    }
  
@@ -2568,16 +2555,16 @@ bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
  
    if (Lexer.isNot(AsmToken::Comma))
      return TokError("expected comma");
-  Lexer.Lex();
+  Lex();
  
    if (Lexer.isNot(AsmToken::Identifier))
      return TokError("expected relocation name");
    SMLoc NameLoc = Lexer.getTok().getLoc();
    StringRef Name = Lexer.getTok().getIdentifier();
-  Lexer.Lex();
+  Lex();
  
    if (Lexer.is(AsmToken::Comma)) {
-    Lexer.Lex();
+    Lex();
      SMLoc ExprLoc = Lexer.getLoc();
      if (parseExpression(Expr))
        return true;
@@ -5250,10 +5237,9 @@ static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) {
  bool parseAssignmentExpression(StringRef Name, bool allow_redef,
                                 MCAsmParser &Parser, MCSymbol *&Sym,
                                 const MCExpr *&Value) {
-  MCAsmLexer &Lexer = Parser.getLexer();
  
    // FIXME: Use better location, we should use proper tokens.
-  SMLoc EqualLoc = Lexer.getLoc();
+  SMLoc EqualLoc = Parser.getTok().getLoc();
  
    if (Parser.parseExpression(Value)) {
      Parser.TokError("missing expression");
@@ -5265,7 +5251,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
    // a = b
    // b = c
  
-  if (Lexer.isNot(AsmToken::EndOfStatement))
+  if (Parser.getTok().isNot(AsmToken::EndOfStatement))
      return Parser.TokError("unexpected token in assignment");
  
    // Eat the end of statement marker.
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp

index 281c3fab61c3d8da994d80531be4381c3efc982b..b3b0dffb6c9965260a4e8f8bd31fff54e91df85f 100644 (file)
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -10072,7 +10072,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
  
    StringRef Arch = Parser.getTok().getString();
    SMLoc ArchLoc = Parser.getTok().getLoc();
-  getLexer().Lex();
+  Lex();
  
    unsigned ID = ARM::parseArch(Arch);
  
@@ -10195,7 +10195,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
  
    StringRef Name = Parser.getTok().getString();
    SMLoc ExtLoc = Parser.getTok().getLoc();
-  getLexer().Lex();
+  Lex();
  
    bool EnableFeature = true;
    if (Name.startswith_lower("no")) {
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp

index 67efa206a40666ebc3fc782d7b1c3ef877787d14..985215ac3850426a14cdcd5148f1590d8787bb7e 100644 (file)
--- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -729,11 +729,10 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
  
  bool HexagonAsmParser::matchBundleOptions() {
    MCAsmParser &Parser = getParser();
-  MCAsmLexer &Lexer = getLexer();
    while (true) {
      if (!Parser.getTok().is(AsmToken::Colon))
        return false;
-    Lexer.Lex();
+    Lex();
      StringRef Option = Parser.getTok().getString();
      if (Option.compare_lower("endloop0") == 0)
        HexagonMCInstrInfo::setInnerLoop(MCB);
@@ -745,7 +744,7 @@ bool HexagonAsmParser::matchBundleOptions() {
        HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
      else
        return true;
-    Lexer.Lex();
+    Lex();
    }
  }
  
@@ -1105,7 +1104,7 @@ bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
    AsmToken const &Token = getParser().getTok();
    StringRef String = Token.getString();
    SMLoc Loc = Token.getLoc();
-  getLexer().Lex();
+  Lex();
    do {
      std::pair<StringRef, StringRef> HeadTail = String.split('.');
      if (!HeadTail.first.empty())
@@ -1297,7 +1296,7 @@ bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
    static char const * Comma = ",";
    do {
      Tokens.emplace_back (Lexer.getTok());
-    Lexer.Lex();
+    Lex();
      switch (Tokens.back().getKind())
      {
      case AsmToken::TokenKind::Hash:
@@ -1346,7 +1345,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
      AsmToken const &Token = Parser.getTok();
      switch (Token.getKind()) {
      case AsmToken::EndOfStatement: {
-      Lexer.Lex();
+      Lex();
        return false;
      }
      case AsmToken::LCurly: {
@@ -1354,19 +1353,19 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
          return true;
        Operands.push_back(
            HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
-      Lexer.Lex();
+      Lex();
        return false;
      }
      case AsmToken::RCurly: {
        if (Operands.empty()) {
          Operands.push_back(
              HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
-        Lexer.Lex();
+        Lex();
        }
        return false;
      }
      case AsmToken::Comma: {
-      Lexer.Lex();
+      Lex();
        continue;
      }
      case AsmToken::EqualEqual:
@@ -1379,7 +1378,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
            Token.getString().substr(0, 1), Token.getLoc()));
        Operands.push_back(HexagonOperand::CreateToken(
            Token.getString().substr(1, 1), Token.getLoc()));
-      Lexer.Lex();
+      Lex();
        continue;
      }
      case AsmToken::Hash: {
@@ -1389,12 +1388,12 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
        if (!ImplicitExpression)
          Operands.push_back(
            HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
-      Lexer.Lex();
+      Lex();
        bool MustExtend = false;
        bool HiOnly = false;
        bool LoOnly = false;
        if (Lexer.is(AsmToken::Hash)) {
-        Lexer.Lex();
+        Lex();
          MustExtend = true;
        } else if (ImplicitExpression)
          MustNotExtend = true;
@@ -1412,7 +1411,7 @@ bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
              HiOnly = false;
              LoOnly = false;
            } else {
-            Lexer.Lex();
+            Lex();
            }
          }
        }
diff --git a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp

index 8742cc9b85a70e533097a23bd3bc04567c76e288..799f587ff3f648b494c110037f92d232a9051d34 100644 (file)
--- a/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -1122,7 +1122,7 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo &Info,
    // Parse until end of statement, consuming commas between operands
    while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.is(AsmToken::Comma)) {
      // Consume comma token
-    Lexer.Lex();
+    Lex();
  
      // Parse next operand
      if (parseOperand(&Operands, Mnemonic) != MatchOperand_Success)
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp

index da48422b46e10a66ee3ecc4f1cd2e9c9451bea47..4181775fc6dabca05e297214c25322ac9a54d743 100644 (file)
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -1710,7 +1710,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
    while (getLexer().isNot(AsmToken::EndOfStatement) &&
           getLexer().is(AsmToken::Comma)) {
      // Consume the comma token
-    getLexer().Lex();
+    Lex();
  
      // Parse the next operand
      if (ParseOperand(Operands))
diff --git a/test/MC/AsmParser/hash-directive.s b/test/MC/AsmParser/hash-directive.s

new file mode 100644 (file)

index 0000000..4c242fd
--- /dev/null
+++ b/test/MC/AsmParser/hash-directive.s
@@ -0,0 +1,23 @@
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2>&1 | FileCheck %s
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+# 3 "FILE1" 1 #<- This is a CPP Hash w/ comment
+error
+# CHECK: FILE1:3:1: error
+# 0 "" 2 #<- This is too
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+ # 1 "FILE2" 2 #<- This is a comment
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+nop; # 6 "FILE3" 2 #<- This is a still comment
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
+nop;# 6 "FILE4" 2
+  nop;
+error
+# CHECK: FILE4:7:1: error
+# 0 "" 2
+/*comment*/# 6 "FILE5" 2 #<- This is a comment
+error
+# CHECK: hash-directive.s:[[@LINE-1]]:1: error
author	Nirav Dave <niravd@google.com>
	Fri, 17 Jun 2016 16:06:17 +0000 (16:06 +0000)
committer	Nirav Dave <niravd@google.com>
	Fri, 17 Jun 2016 16:06:17 +0000 (16:06 +0000)
include/llvm/MC/MCParser/AsmLexer.h		patch \| blob \| history
include/llvm/MC/MCParser/MCAsmLexer.h		patch \| blob \| history
lib/MC/MCParser/AsmLexer.cpp		patch \| blob \| history
lib/MC/MCParser/AsmParser.cpp		patch \| blob \| history
lib/Target/ARM/AsmParser/ARMAsmParser.cpp		patch \| blob \| history
lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp		patch \| blob \| history
lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp		patch \| blob \| history
lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp		patch \| blob \| history
test/MC/AsmParser/hash-directive.s	[new file with mode: 0644]	patch \| blob