From: Eli Friedman Date: Fri, 30 Sep 2011 01:13:51 +0000 (+0000) Subject: Some fixes for MS-style asm parsing: specifically, add some error checking, and handl... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3fedbe1f71c18fba01d39109d606f421a0103a2a;p=clang Some fixes for MS-style asm parsing: specifically, add some error checking, and handle asm comments using semicolons correctly. (The comments are actually surprisingly tricky.) git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@140837 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h index 3056819b3a..73e157bf31 100644 --- a/include/clang/Parse/Parser.h +++ b/include/clang/Parse/Parser.h @@ -1409,7 +1409,7 @@ private: StmtResult ParseBreakStatement(ParsedAttributes &Attr); StmtResult ParseReturnStatement(ParsedAttributes &Attr); StmtResult ParseAsmStatement(bool &msAsm); - StmtResult FuzzyParseMicrosoftAsmStatement(SourceLocation AsmLoc); + StmtResult ParseMicrosoftAsmStatement(SourceLocation AsmLoc); bool ParseMicrosoftIfExistsCondition(bool& Result); void ParseMicrosoftIfExistsStatement(StmtVector &Stmts); void ParseMicrosoftIfExistsExternalDeclaration(); diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp index 58514b0467..489ceab950 100644 --- a/lib/Parse/ParseStmt.cpp +++ b/lib/Parse/ParseStmt.cpp @@ -1572,30 +1572,105 @@ StmtResult Parser::ParseReturnStatement(ParsedAttributes &attrs) { return Actions.ActOnReturnStmt(ReturnLoc, R.take()); } -/// FuzzyParseMicrosoftAsmStatement. When -fms-extensions is enabled, this -/// routine is called to skip/ignore tokens that comprise the MS asm statement. -StmtResult Parser::FuzzyParseMicrosoftAsmStatement(SourceLocation AsmLoc) { - SourceLocation EndLoc; - if (Tok.is(tok::l_brace)) { - unsigned short savedBraceCount = BraceCount; - do { - EndLoc = Tok.getLocation(); - ConsumeAnyToken(); - } while (BraceCount > savedBraceCount && Tok.isNot(tok::eof)); - } else { - // From the MS website: If used without braces, the __asm keyword means - // that the rest of the line is an assembly-language statement. - SourceManager &SrcMgr = PP.getSourceManager(); +/// ParseMicrosoftAsmStatement. When -fms-extensions/-fasm-blocks is enabled, +/// this routine is called to collect the tokens for an MS asm statement. +StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { + SourceManager &SrcMgr = PP.getSourceManager(); + SourceLocation EndLoc = AsmLoc; + do { + bool InBraces = false; + unsigned short savedBraceCount; + bool InAsmComment = false; + FileID FID; + unsigned LineNo; + unsigned NumTokensRead = 0; + SourceLocation LBraceLoc; + + if (Tok.is(tok::l_brace)) { + // Braced inline asm: consume the opening brace. + InBraces = true; + savedBraceCount = BraceCount; + EndLoc = LBraceLoc = ConsumeBrace(); + ++NumTokensRead; + } else { + // Single-line inline asm; compute which line it is on. + std::pair ExpAsmLoc = + SrcMgr.getDecomposedExpansionLoc(EndLoc); + FID = ExpAsmLoc.first; + LineNo = SrcMgr.getLineNumber(FID, ExpAsmLoc.second); + } + SourceLocation TokLoc = Tok.getLocation(); - unsigned LineNo = SrcMgr.getExpansionLineNumber(TokLoc); do { + // If we hit EOF, we're done, period. + if (Tok.is(tok::eof)) + break; + // When we consume the closing brace, we're done. + if (InBraces && BraceCount == savedBraceCount) + break; + + if (!InAsmComment && Tok.is(tok::semi)) { + // A semicolon in an asm is the start of a comment. + InAsmComment = true; + if (InBraces) { + // Compute which line the comment is on. + std::pair ExpSemiLoc = + SrcMgr.getDecomposedExpansionLoc(TokLoc); + FID = ExpSemiLoc.first; + LineNo = SrcMgr.getLineNumber(FID, ExpSemiLoc.second); + } + } else if (!InBraces || InAsmComment) { + // If end-of-line is significant, check whether this token is on a + // new line. + std::pair ExpLoc = + SrcMgr.getDecomposedExpansionLoc(TokLoc); + if (ExpLoc.first != FID || + SrcMgr.getLineNumber(ExpLoc.first, ExpLoc.second) != LineNo) { + // If this is a single-line __asm, we're done. + if (!InBraces) + break; + // We're no longer in a comment. + InAsmComment = false; + } else if (!InAsmComment && Tok.is(tok::r_brace)) { + // Single-line asm always ends when a closing brace is seen. + // FIXME: This is compatible with Apple gcc's -fasm-blocks; what + // does MSVC do here? + break; + } + } + + // Consume the next token; make sure we don't modify the brace count etc. + // if we are in a comment. EndLoc = TokLoc; - ConsumeAnyToken(); + if (InAsmComment) + PP.Lex(Tok); + else + ConsumeAnyToken(); TokLoc = Tok.getLocation(); - } while ((SrcMgr.getExpansionLineNumber(TokLoc) == LineNo) && - Tok.isNot(tok::r_brace) && Tok.isNot(tok::semi) && - Tok.isNot(tok::eof)); - } + ++NumTokensRead; + } while (1); + + if (InBraces && BraceCount != savedBraceCount) { + // __asm without closing brace (this can happen at EOF). + Diag(Tok, diag::err_expected_rbrace); + Diag(LBraceLoc, diag::note_matching) << "{"; + return StmtError(); + } else if (NumTokensRead == 0) { + // Empty __asm. + Diag(Tok, diag::err_expected_lbrace); + return StmtError(); + } + // Multiple adjacent asm's form together into a single asm statement + // in the AST. + if (!Tok.is(tok::kw_asm)) + break; + EndLoc = ConsumeToken(); + } while (1); + // FIXME: Need to actually grab the data and pass it on to Sema. Ideally, + // what Sema wants is a string of the entire inline asm, with one instruction + // per line and all the __asm keywords stripped out, and a way of mapping + // from any character of that string to its location in the original source + // code. I'm not entirely sure how to go about that, though. Token t; t.setKind(tok::string_literal); t.setLiteralData("\"/*FIXME: not done*/\""); @@ -1631,12 +1706,16 @@ StmtResult Parser::FuzzyParseMicrosoftAsmStatement(SourceLocation AsmLoc) { /// asm-clobbers ',' asm-string-literal /// /// [MS] ms-asm-statement: -/// '__asm' assembly-instruction ';'[opt] -/// '__asm' '{' assembly-instruction-list '}' ';'[opt] +/// ms-asm-block +/// ms-asm-block ms-asm-statement +/// +/// [MS] ms-asm-block: +/// '__asm' ms-asm-line '\n' +/// '__asm' '{' ms-asm-instruction-block[opt] '}' ';'[opt] /// -/// [MS] assembly-instruction-list: -/// assembly-instruction ';'[opt] -/// assembly-instruction-list ';' assembly-instruction ';'[opt] +/// [MS] ms-asm-instruction-block +/// ms-asm-line +/// ms-asm-line '\n' ms-asm-instruction-block /// StmtResult Parser::ParseAsmStatement(bool &msAsm) { assert(Tok.is(tok::kw_asm) && "Not an asm stmt"); @@ -1644,7 +1723,7 @@ StmtResult Parser::ParseAsmStatement(bool &msAsm) { if (getLang().MicrosoftExt && Tok.isNot(tok::l_paren) && !isTypeQualifier()) { msAsm = true; - return FuzzyParseMicrosoftAsmStatement(AsmLoc); + return ParseMicrosoftAsmStatement(AsmLoc); } DeclSpec DS(AttrFactory); SourceLocation Loc = Tok.getLocation(); diff --git a/test/Parser/ms-inline-asm.c b/test/Parser/ms-inline-asm.c new file mode 100644 index 0000000000..b1af23e472 --- /dev/null +++ b/test/Parser/ms-inline-asm.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -verify -fms-extensions + +#define M __asm int 0x2c +#define M2 int + +void t1(void) { M } +void t2(void) { __asm int 0x2c } +void t3(void) { __asm M2 0x2c } +void* t4(void) { __asm mov eax, fs:[0x10] } +void t5() { + __asm { + int 0x2c ; } asm comments are fun! }{ + } + __asm {} +} +int t6() { + __asm int 3 ; } comments for single-line asm + __asm {} + + __asm int 4 + return 10; +} +int t7() { // expected-note {{to match this}} + __asm + __asm { // expected-error 3 {{expected}} expected-note {{to match this}} diff --git a/test/Sema/ms-fuzzy-asm.c b/test/Sema/ms-fuzzy-asm.c deleted file mode 100644 index 250e322256..0000000000 --- a/test/Sema/ms-fuzzy-asm.c +++ /dev/null @@ -1,9 +0,0 @@ -// RUN: %clang_cc1 %s -verify -fms-extensions - -#define M __asm int 0x2c -#define M2 int - -void t1(void) { M } -void t2(void) { __asm int 0x2c } -void t3(void) { __asm M2 0x2c } -void* t4(void) { __asm mov eax, fs:[0x10] }