From: David L. Jones Date: Thu, 16 May 2019 23:27:07 +0000 (+0000) Subject: [X86][AsmParser] Ignore "short" even harder in Intel syntax ASM. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2752aa2b3c1ae437ea3d6a353400a256c7ea86b0;p=llvm [X86][AsmParser] Ignore "short" even harder in Intel syntax ASM. In Intel syntax, it's not uncommon to see a "short" modifier on Jcc conditional jumps, which indicates the offset should be a "short jump" (8-bit immediate offset from EIP, -128 to +127). This patch expands to all recognized Jcc condition codes, and removes the inline restriction. Clang already ignores "jmp short" in inline assembly. However, only "jmp" and a couple of Jcc are actually checked, and only inline (i.e., not when using the integrated assembler for asm sources). A quick search through asm-containing libraries at hand shows a pretty broad range of Jcc conditions spelled with "short." GAS ignores the "short" modifier, and instead uses an encoding based on the given immediate. MS inline seems to do the same, and I suspect MASM does, too. NASM will yield an error if presented with an out-of-range immediate value. Example of GCC 9.1 and MSVC v19.20, "jmp short" with offsets that do and do not fit within 8 bits: https://gcc.godbolt.org/z/aFZmjY Differential Revision: https://reviews.llvm.org/D61990 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360954 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 2ab979a7b76..6239cfc3cdf 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -848,6 +848,8 @@ private: const SMLoc &StartLoc, SMLoc &EndLoc); + X86::CondCode ParseConditionCode(StringRef CCode); + bool ParseIntelMemoryOperandSize(unsigned &Size); std::unique_ptr CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, @@ -2005,6 +2007,29 @@ std::unique_ptr X86AsmParser::ParseATTOperand() { } } +// X86::COND_INVALID if not a recognized condition code or alternate mnemonic, +// otherwise the EFLAGS Condition Code enumerator. +X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { + return StringSwitch(CC) + .Case("o", X86::COND_O) // Overflow + .Case("no", X86::COND_NO) // No Overflow + .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal + .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below + .Cases("e", "z", X86::COND_E) // Equal/Zero + .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero + .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above + .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal + .Case("s", X86::COND_S) // Sign + .Case("ns", X86::COND_NS) // No Sign + .Cases("p", "pe", X86::COND_P) // Parity/Parity Even + .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd + .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal + .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less + .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater + .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal + .Default(X86::COND_INVALID); +} + // true on failure, false otherwise // If no {z} mark was found - Parser doesn't advance bool X86AsmParser::ParseZ(std::unique_ptr &Z, @@ -2354,16 +2379,20 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, StringRef PatchedName = Name; - if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) && - isParsingIntelSyntax() && isParsingInlineAsm()) { + // Hack to skip "short" following Jcc. + if (isParsingIntelSyntax() && + (PatchedName == "jmp" || + (PatchedName.startswith("j") && + ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { StringRef NextTok = Parser.getTok().getString(); if (NextTok == "short") { SMLoc NameEndLoc = NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); - // Eat the short keyword + // Eat the short keyword. Parser.Lex(); - // MS ignores the short keyword, it determines the jmp type based - // on the distance of the label + // MS and GAS ignore the short keyword; they both determine the jmp type + // based on the distance of the label. (NASM does emit different code with + // and without "short," though.) InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, NextTok.size() + 1); } diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s index 171357e1f52..b11402c44e9 100644 --- a/test/MC/X86/intel-syntax.s +++ b/test/MC/X86/intel-syntax.s @@ -64,6 +64,10 @@ main: jmp FWORD ptr [rax] // CHECK: ljmpq *(%rax) ljmp [rax] +// CHECK: jmp _foo + jmp short _foo +// CHECK: jp _foo + jpe short _foo // CHECK: movl $257, -4(%rsp) mov DWORD PTR [RSP - 4], 257