From: Andrew V. Tischenko Date: Mon, 16 Oct 2017 11:14:29 +0000 (+0000) Subject: This patch is a result of D37262: The issues with X86 prefixes. It closes PR7709... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ddb14f8532dc5b64fa4b83d54eb3625077751be7;p=llvm This patch is a result of D37262: The issues with X86 prefixes. It closes PR7709, PR17697, PR19251, PR32809 and PR21640. There could be other bugs closed by this patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315899 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h index 9bf440ea96d..db28fd0fd6d 100644 --- a/include/llvm/MC/MCInst.h +++ b/include/llvm/MC/MCInst.h @@ -160,6 +160,10 @@ class MCInst { unsigned Opcode = 0; SMLoc Loc; SmallVector Operands; + // These flags could be used to pass some info from one target subcomponent + // to another, for example, from disassembler to asm printer. The values of + // the flags have any sense on target level only (e.g. prefixes on x86). + unsigned Flags = 0; public: MCInst() = default; @@ -167,6 +171,9 @@ public: void setOpcode(unsigned Op) { Opcode = Op; } unsigned getOpcode() const { return Opcode; } + void setFlags(unsigned F) { Flags = F; } + unsigned getFlags() const { return Flags; } + void setLoc(SMLoc loc) { Loc = loc; } SMLoc getLoc() const { return Loc; } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 385bbbe60a2..6985d356af6 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2330,7 +2330,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } - Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); // Determine whether this is an instruction prefix. // FIXME: @@ -2340,22 +2339,48 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // lock addq %rax, %rbx ; Destination operand must be of memory type // xacquire ; xacquire must be accompanied by 'lock' bool isPrefix = StringSwitch(Name) - .Cases("lock", - "rep", "repe", - "repz", "repne", - "repnz", "rex64", - "data32", "data16", true) - .Cases("xacquire", "xrelease", true) - .Cases("acquire", "release", isParsingIntelSyntax()) - .Default(false); + .Cases("rex64", "data32", "data16", true) + .Cases("xacquire", "xrelease", true) + .Cases("acquire", "release", isParsingIntelSyntax()) + .Default(false); + + auto isLockRepeatPrefix = [](StringRef N) { + return StringSwitch(N) + .Cases("lock", "rep", "repe", "repz", "repne", "repnz", true) + .Default(false); + }; bool CurlyAsEndOfStatement = false; + + unsigned Flags = X86::IP_NO_PREFIX; + while (isLockRepeatPrefix(Name.lower())) { + unsigned Prefix = + StringSwitch(Name) + .Cases("lock", "lock", X86::IP_HAS_LOCK) + .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) + .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) + .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) + Flags |= Prefix; + Name = Parser.getTok().getString(); + Parser.Lex(); // eat the prefix + // Hack: we could have something like + // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" + while (Name.startswith(";") || Name.startswith("\n") || + Name.startswith("\t") or Name.startswith("/")) { + Name = Parser.getTok().getString(); + Parser.Lex(); // go to next prefix or instr + } + } + + if (Flags) + PatchedName = Name; + Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); + // This does the actual operand parsing. Don't parse any more if we have a // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we // just want to parse the "lock" as the first instruction and the "incl" as // the next one. if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) { - // Parse '*' modifier. if (getLexer().is(AsmToken::Star)) Operands.push_back(X86Operand::CreateToken("*", consumeToken())); @@ -2593,6 +2618,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } + if (Flags) + Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); return false; } @@ -2660,6 +2687,16 @@ bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo, return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); } +static unsigned getPrefixes(OperandVector &Operands) { + unsigned Result = 0; + X86Operand &Prefix = static_cast(*Operands.back()); + if (Prefix.isPrefix()) { + Result = Prefix.getPrefix(); + Operands.pop_back(); + } + return Result; +} + bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, @@ -2674,8 +2711,13 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); bool WasOriginallyInvalidOperand = false; + unsigned Prefixes = getPrefixes(Operands); + MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // First, try a direct match. switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax())) { @@ -2840,12 +2882,16 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, StringRef Mnemonic = Op.getToken(); SMRange EmptyRange = None; StringRef Base = Op.getToken(); + unsigned Prefixes = getPrefixes(Operands); // First, handle aliases that expand to multiple instructions. MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm); MCInst Inst; + if (Prefixes) + Inst.setFlags(Prefixes); + // Find one unsized memory operand, if present. X86Operand *UnsizedMemOp = nullptr; for (const auto &Op : Operands) { diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index 0fba15cc692..f80f8e37664 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -28,12 +28,7 @@ namespace llvm { /// X86Operand - Instances of this class represent a parsed X86 machine /// instruction. struct X86Operand : public MCParsedAsmOperand { - enum KindTy { - Token, - Register, - Immediate, - Memory - } Kind; + enum KindTy { Token, Register, Immediate, Memory, Prefix } Kind; SMLoc StartLoc, EndLoc; SMLoc OffsetOfLoc; @@ -50,6 +45,10 @@ struct X86Operand : public MCParsedAsmOperand { unsigned RegNo; }; + struct PrefOp { + unsigned Prefixes; + }; + struct ImmOp { const MCExpr *Val; }; @@ -73,6 +72,7 @@ struct X86Operand : public MCParsedAsmOperand { struct RegOp Reg; struct ImmOp Imm; struct MemOp Mem; + struct PrefOp Pref; }; X86Operand(KindTy K, SMLoc Start, SMLoc End) @@ -111,6 +111,11 @@ struct X86Operand : public MCParsedAsmOperand { return Reg.RegNo; } + unsigned getPrefix() const { + assert(Kind == Prefix && "Invalid access!"); + return Pref.Prefixes; + } + const MCExpr *getImm() const { assert(Kind == Immediate && "Invalid access!"); return Imm.Val; @@ -387,6 +392,7 @@ struct X86Operand : public MCParsedAsmOperand { return isMemOffs() && Mem.ModeSize == 64 && (!Mem.Size || Mem.Size == 64); } + bool isPrefix() const { return Kind == Prefix; } bool isReg() const override { return Kind == Register; } bool isGR32orGR64() const { @@ -509,6 +515,13 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } + static std::unique_ptr + CreatePrefix(unsigned Prefixes, SMLoc StartLoc, SMLoc EndLoc) { + auto Res = llvm::make_unique(Prefix, StartLoc, EndLoc); + Res->Pref.Prefixes = Prefixes; + return Res; + } + static std::unique_ptr CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc) { auto Res = llvm::make_unique(Immediate, StartLoc, EndLoc); diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 4ce908b1da6..60c56241088 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -74,6 +74,7 @@ // //===----------------------------------------------------------------------===// +#include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86DisassemblerDecoder.h" #include "llvm/MC/MCContext.h" @@ -232,7 +233,24 @@ MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction( return Fail; } else { Size = InternalInstr.length; - return (!translateInstruction(Instr, InternalInstr, this)) ? Success : Fail; + bool Ret = translateInstruction(Instr, InternalInstr, this); + if (!Ret) { + unsigned Flags = X86::IP_NO_PREFIX; + if (InternalInstr.hasAdSize) + Flags |= X86::IP_HAS_AD_SIZE; + if (!InternalInstr.mandatoryPrefix) { + if (InternalInstr.hasOpSize) + Flags |= X86::IP_HAS_OP_SIZE; + if (InternalInstr.repeatPrefix == 0xf2) + Flags |= X86::IP_HAS_REPEAT_NE; + else if (InternalInstr.repeatPrefix == 0xf3 && + // It should not be 'pause' f3 90 + InternalInstr.opcode != 0x90) + Flags |= X86::IP_HAS_REPEAT; + } + Instr.setFlags(Flags); + } + return (!Ret) ? Success : Fail; } } @@ -315,12 +333,12 @@ static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) { unsigned baseRegNo; if (insn.mode == MODE_64BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::RSI; + baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI; else if (insn.mode == MODE_32BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::SI : X86::ESI; + baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI; else { assert(insn.mode == MODE_16BIT); - baseRegNo = insn.prefixPresent[0x67] ? X86::ESI : X86::SI; + baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI; } MCOperand baseReg = MCOperand::createReg(baseRegNo); mcInst.addOperand(baseReg); @@ -340,12 +358,12 @@ static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) { unsigned baseRegNo; if (insn.mode == MODE_64BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::RDI; + baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI; else if (insn.mode == MODE_32BIT) - baseRegNo = insn.prefixPresent[0x67] ? X86::DI : X86::EDI; + baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI; else { assert(insn.mode == MODE_16BIT); - baseRegNo = insn.prefixPresent[0x67] ? X86::EDI : X86::DI; + baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI; } MCOperand baseReg = MCOperand::createReg(baseRegNo); mcInst.addOperand(baseReg); diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 577b7a776c6..8524464764f 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -277,38 +277,44 @@ static void dbgprintf(struct InternalInstruction* insn, insn->dlog(insn->dlogArg, buffer); } -/* - * setPrefixPresent - Marks that a particular prefix is present at a particular - * location. - * - * @param insn - The instruction to be marked as having the prefix. - * @param prefix - The prefix that is present. - * @param location - The location where the prefix is located (in the address - * space of the instruction's reader). - */ -static void setPrefixPresent(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - insn->prefixPresent[prefix] = 1; - insn->prefixLocations[prefix] = location; +static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { + if (insn->mode == MODE_64BIT) + return prefix >= 0x40 && prefix <= 0x4f; + return false; } /* - * isPrefixAtLocation - Queries an instruction to determine whether a prefix is - * present at a given location. + * setPrefixPresent - Marks that a particular prefix is present as mandatory * - * @param insn - The instruction to be queried. - * @param prefix - The prefix. - * @param location - The location to query. - * @return - Whether the prefix is at that location. + * @param insn - The instruction to be marked as having the prefix. + * @param prefix - The prefix that is present. */ -static bool isPrefixAtLocation(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ - return insn->prefixPresent[prefix] == 1 && - insn->prefixLocations[prefix] == location; +static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix) { + uint8_t nextByte; + switch (prefix) { + case 0xf2: + case 0xf3: + if (lookAtByte(insn, &nextByte)) + break; + // TODO: + // 1. There could be several 0x66 + // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then + // it's not mandatory prefix + // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need + // 0x0f exactly after it to be mandatory prefix + if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) + // The last of 0xf2 /0xf3 is mandatory prefix + insn->mandatoryPrefix = prefix; + insn->repeatPrefix = prefix; + break; + case 0x66: + if (lookAtByte(insn, &nextByte)) + break; + // 0x66 can't overwrite existing mandatory prefix and should be ignored + if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte))) + insn->mandatoryPrefix = prefix; + break; + } } /* @@ -322,19 +328,12 @@ static bool isPrefixAtLocation(struct InternalInstruction* insn, */ static int readPrefixes(struct InternalInstruction* insn) { bool isPrefix = true; - bool prefixGroups[4] = { false }; - uint64_t prefixLocation; uint8_t byte = 0; uint8_t nextByte; - bool hasAdSize = false; - bool hasOpSize = false; - dbgprintf(insn, "readPrefixes()"); while (isPrefix) { - prefixLocation = insn->readerCursor; - /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ if (consumeByte(insn, &byte)) break; @@ -343,13 +342,10 @@ static int readPrefixes(struct InternalInstruction* insn) { * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then * break and let it be disassembled as a normal "instruction". */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK break; - if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf2 || byte == 0xf3) - && !lookAtByte(insn, &nextByte)) - { + if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: @@ -357,39 +353,41 @@ static int readPrefixes(struct InternalInstruction* insn) { * - it is followed by an xchg instruction * then it should be disassembled as a xacquire/xrelease not repne/rep. */ - if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) || - ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + if (((nextByte == 0xf0) || + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) { insn->xAcquireRelease = true; + if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support + break; + } /* * Also if the byte is 0xf3, and the following condition is met: * - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or * "mov mem, imm" (opcode 0xc6/0xc7) instructions. * then it should be disassembled as an xrelease not rep. */ - if (byte == 0xf3 && - (nextByte == 0x88 || nextByte == 0x89 || - nextByte == 0xc6 || nextByte == 0xc7)) + if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || + nextByte == 0xc6 || nextByte == 0xc7)) { insn->xAcquireRelease = true; - if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { - if (consumeByte(insn, &nextByte)) + if (nextByte != 0x90) // PAUSE instruction support + break; + } + if (isREX(insn, nextByte)) { + uint8_t nnextByte; + // Go to REX prefix after the current one + if (consumeByte(insn, &nnextByte)) return -1; - if (lookAtByte(insn, &nextByte)) + // We should be able to read next byte after REX prefix + if (lookAtByte(insn, &nnextByte)) return -1; unconsumeByte(insn); } - if (nextByte != 0x0f && nextByte != 0x90) - break; } switch (byte) { case 0xf0: /* LOCK */ case 0xf2: /* REPNE/REPNZ */ case 0xf3: /* REP or REPE/REPZ */ - if (prefixGroups[0]) - dbgprintf(insn, "Redundant Group 1 prefix"); - prefixGroups[0] = true; - setPrefixPresent(insn, byte, prefixLocation); + setPrefixPresent(insn, byte); break; case 0x2e: /* CS segment override -OR- Branch not taken */ case 0x36: /* SS segment override -OR- Branch taken */ @@ -420,24 +418,15 @@ static int readPrefixes(struct InternalInstruction* insn) { debug("Unhandled override"); return -1; } - if (prefixGroups[1]) - dbgprintf(insn, "Redundant Group 2 prefix"); - prefixGroups[1] = true; - setPrefixPresent(insn, byte, prefixLocation); + setPrefixPresent(insn, byte); break; case 0x66: /* Operand-size override */ - if (prefixGroups[2]) - dbgprintf(insn, "Redundant Group 3 prefix"); - prefixGroups[2] = true; - hasOpSize = true; - setPrefixPresent(insn, byte, prefixLocation); + insn->hasOpSize = true; + setPrefixPresent(insn, byte); break; case 0x67: /* Address-size override */ - if (prefixGroups[3]) - dbgprintf(insn, "Redundant Group 4 prefix"); - prefixGroups[3] = true; - hasAdSize = true; - setPrefixPresent(insn, byte, prefixLocation); + insn->hasAdSize = true; + setPrefixPresent(insn, byte); break; default: /* Not a prefix byte */ isPrefix = false; @@ -469,7 +458,6 @@ static int readPrefixes(struct InternalInstruction* insn) { } else { unconsumeByte(insn); /* unconsume byte1 */ unconsumeByte(insn); /* unconsume byte */ - insn->necessaryPrefixLocation = insn->readerCursor - 2; } if (insn->vectorExtensionType == TYPE_EVEX) { @@ -505,13 +493,10 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) insn->vectorExtensionType = TYPE_VEX_3B; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } else { + else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } if (insn->vectorExtensionType == TYPE_VEX_3B) { insn->vectorExtensionPrefix[0] = byte; @@ -520,13 +505,12 @@ static int readPrefixes(struct InternalInstruction* insn) { /* We simulate the REX prefix for simplicity's sake */ - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); - } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], @@ -540,26 +524,24 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) insn->vectorExtensionType = TYPE_VEX_2B; - } else { + else unconsumeByte(insn); - } if (insn->vectorExtensionType == TYPE_VEX_2B) { insn->vectorExtensionPrefix[0] = byte; consumeByte(insn, &insn->vectorExtensionPrefix[1]); - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); - } switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { default: break; case VEX_PREFIX_66: - hasOpSize = true; + insn->hasOpSize = true; break; } @@ -575,13 +557,10 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if ((byte1 & 0x38) != 0x0) { /* 0 in these 3 bits is a POP instruction. */ + if ((byte1 & 0x38) != 0x0) /* 0 in these 3 bits is a POP instruction. */ insn->vectorExtensionType = TYPE_XOP; - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } else { + else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } if (insn->vectorExtensionType == TYPE_XOP) { insn->vectorExtensionPrefix[0] = byte; @@ -590,19 +569,18 @@ static int readPrefixes(struct InternalInstruction* insn) { /* We simulate the REX prefix for simplicity's sake */ - if (insn->mode == MODE_64BIT) { + if (insn->mode == MODE_64BIT) insn->rexPrefix = 0x40 | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); - } switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { default: break; case VEX_PREFIX_66: - hasOpSize = true; + insn->hasOpSize = true; break; } @@ -610,51 +588,35 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], insn->vectorExtensionPrefix[2]); } - } else { - if (insn->mode == MODE_64BIT) { - if ((byte & 0xf0) == 0x40) { - uint8_t opcodeByte; - - if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { - dbgprintf(insn, "Redundant REX prefix"); - return -1; - } - - insn->rexPrefix = byte; - insn->necessaryPrefixLocation = insn->readerCursor - 2; - - dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } - } + } else if (isREX(insn, byte)) { + if (lookAtByte(insn, &nextByte)) + return -1; + insn->rexPrefix = byte; + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); + } else + unconsumeByte(insn); if (insn->mode == MODE_16BIT) { - insn->registerSize = (hasOpSize ? 4 : 2); - insn->addressSize = (hasAdSize ? 4 : 2); - insn->displacementSize = (hasAdSize ? 4 : 2); - insn->immediateSize = (hasOpSize ? 4 : 2); + insn->registerSize = (insn->hasOpSize ? 4 : 2); + insn->addressSize = (insn->hasAdSize ? 4 : 2); + insn->displacementSize = (insn->hasAdSize ? 4 : 2); + insn->immediateSize = (insn->hasOpSize ? 4 : 2); } else if (insn->mode == MODE_32BIT) { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 2 : 4); - insn->displacementSize = (hasAdSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (insn->hasOpSize ? 2 : 4); + insn->addressSize = (insn->hasAdSize ? 2 : 4); + insn->displacementSize = (insn->hasAdSize ? 2 : 4); + insn->immediateSize = (insn->hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { insn->registerSize = 8; - insn->addressSize = (hasAdSize ? 4 : 8); + insn->addressSize = (insn->hasAdSize ? 4 : 8); insn->displacementSize = 4; insn->immediateSize = 4; } else { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = (hasOpSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (insn->hasOpSize ? 2 : 4); + insn->addressSize = (insn->hasAdSize ? 4 : 8); + insn->displacementSize = (insn->hasOpSize ? 2 : 4); + insn->immediateSize = (insn->hasOpSize ? 2 : 4); } } @@ -758,7 +720,10 @@ static int readOpcode(struct InternalInstruction* insn) { insn->opcodeType = TWOBYTE; } - } + } else if (insn->mandatoryPrefix) + // The opcode with mandatory prefix must start with opcode escape. + // If not it's legacy repeat prefix + insn->mandatoryPrefix = 0; /* * At this point we have consumed the full opcode. @@ -950,15 +915,38 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { } else { return -1; } - } else { - if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + } else if (!insn->mandatoryPrefix) { + // If we don't have mandatory prefix we should use legacy prefixes here + if (insn->hasOpSize && (insn->mode != MODE_16BIT)) attrMask |= ATTR_OPSIZE; - else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) + if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; - else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XS; - else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) + if (insn->opcodeType == ONEBYTE) { + if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90)) + // Special support for PAUSE + attrMask |= ATTR_XS; + } else { + if (insn->repeatPrefix == 0xf2) + attrMask |= ATTR_XD; + else if (insn->repeatPrefix == 0xf3) + attrMask |= ATTR_XS; + } + } else { + switch (insn->mandatoryPrefix) { + case 0xf2: attrMask |= ATTR_XD; + break; + case 0xf3: + attrMask |= ATTR_XS; + break; + case 0x66: + if (insn->mode != MODE_16BIT) + attrMask |= ATTR_OPSIZE; + break; + case 0x67: + attrMask |= ATTR_ADSIZE; + break; + } } if (insn->rexPrefix & 0x08) @@ -977,8 +965,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes */ - if (insn->mode == MODE_64BIT && - isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { + if ((insn->mode == MODE_64BIT) && insn->hasOpSize) { switch (insn->opcode) { case 0xE8: case 0xE9: @@ -1058,9 +1045,9 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { */ if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) { /* Make sure we observed the prefixes in any position. */ - if (insn->prefixPresent[0x67]) + if (insn->hasAdSize) attrMask |= ATTR_ADSIZE; - if (insn->prefixPresent[0x66]) + if (insn->hasOpSize) attrMask |= ATTR_OPSIZE; /* In 16-bit, invert the attributes. */ @@ -1075,7 +1062,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { return 0; } - if ((insn->mode == MODE_16BIT || insn->prefixPresent[0x66]) && + if ((insn->mode == MODE_16BIT || insn->hasOpSize) && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that @@ -1108,7 +1095,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) { specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg); if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) && - (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) { + (insn->mode == MODE_16BIT) ^ insn->hasOpSize) { insn->instructionID = instructionIDWithOpsize; insn->spec = specifierForUID(instructionIDWithOpsize); } else { diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index b07fd0b17d3..eeef407366c 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -546,24 +546,26 @@ struct InternalInstruction { // Prefix state - // 1 if the prefix byte corresponding to the entry is present; 0 if not - uint8_t prefixPresent[0x100]; - // contains the location (for use with the reader) of the prefix byte - uint64_t prefixLocations[0x100]; + // The possible mandatory prefix + uint8_t mandatoryPrefix; // The value of the vector extension prefix(EVEX/VEX/XOP), if present uint8_t vectorExtensionPrefix[4]; // The type of the vector extension prefix VectorExtensionType vectorExtensionType; // The value of the REX prefix, if present uint8_t rexPrefix; - // The location where a mandatory prefix would have to be (i.e., right before - // the opcode, or right before the REX prefix if one is present). - uint64_t necessaryPrefixLocation; // The segment override type SegmentOverride segmentOverride; // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease bool xAcquireRelease; + // Address-size override + bool hasAdSize; + // Operand-size override + bool hasOpSize; + // The repeat prefix if any + uint8_t repeatPrefix; + // Sizes of various critical pieces of data, in bytes uint8_t registerSize; uint8_t addressSize; diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 4d91300c7ed..6ff1136cd85 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -50,8 +50,16 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, HasCustomInstComment = EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); + unsigned Flags = MI->getFlags(); if (TSFlags & X86II::LOCK) OS << "\tlock\t"; + if (!(TSFlags & X86II::LOCK) && Flags & X86::IP_HAS_LOCK) + OS << "\tlock\n"; + + if (Flags & X86::IP_HAS_REPEAT_NE) + OS << "\trepne\n"; + else if (Flags & X86::IP_HAS_REPEAT) + OS << "\trep\n"; // Output CALLpcrel32 as "callq" in 64-bit mode. // In Intel annotation it's always emitted as "call". diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 72593878e44..464941a1bab 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -43,6 +43,12 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, if (TSFlags & X86II::LOCK) OS << "\tlock\n"; + unsigned Flags = MI->getFlags(); + if (Flags & X86::IP_HAS_REPEAT_NE) + OS << "\trepne\n"; + else if (Flags & X86::IP_HAS_REPEAT) + OS << "\trep\n"; + printInstruction(MI, OS); // Next always print the annotation. diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index d8953da4abb..7c6444ba58a 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -51,6 +51,16 @@ namespace X86 { TO_ZERO = 3, CUR_DIRECTION = 4 }; + + /// The constants to describe instr prefixes if there are + enum IPREFIXES { + IP_NO_PREFIX = 0, + IP_HAS_OP_SIZE = 1, + IP_HAS_AD_SIZE = 2, + IP_HAS_REPEAT_NE = 4, + IP_HAS_REPEAT = 8, + IP_HAS_LOCK = 16 + }; } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index d12d2b689b3..272c6f23014 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1108,7 +1108,7 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EmitByte(0x66, CurByte, OS); // Emit the LOCK opcode prefix. - if (TSFlags & X86II::LOCK) + if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK) EmitByte(0xF0, CurByte, OS); switch (TSFlags & X86II::OpPrefixMask) { @@ -1159,6 +1159,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; + unsigned Flags = MI.getFlags(); // Pseudo instructions don't get encoded. if ((TSFlags & X86II::FormMask) == X86II::Pseudo) @@ -1194,8 +1195,10 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, MI, OS); // Emit the repeat opcode prefix as needed. - if (TSFlags & X86II::REP) + if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT) EmitByte(0xF3, CurByte, OS); + if (Flags & X86::IP_HAS_REPEAT_NE) + EmitByte(0xF2, CurByte, OS); // Emit the address size opcode prefix as needed. bool need_address_override; diff --git a/test/MC/Disassembler/X86/prefixes.txt b/test/MC/Disassembler/X86/prefixes.txt index 9e002fab465..983e09670d6 100644 --- a/test/MC/Disassembler/X86/prefixes.txt +++ b/test/MC/Disassembler/X86/prefixes.txt @@ -1,5 +1,60 @@ # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s +# CHECK: rep +# CHECK-NEXT: insb %dx, %es:(%rdi) +0xf3 0x6c #rep ins +# CHECK: rep +# CHECK-NEXT: insl %dx, %es:(%rdi) +0xf3 0x6d #rep ins +# CHECK: rep +# CHECK-NEXT: movsb (%rsi), %es:(%rdi) +0xf3 0xa4 #rep movs +# CHECK: rep +# CHECK-NEXT: movsl (%rsi), %es:(%rdi) +0xf3 0xa5 #rep movs +# CHECK: rep +# CHECK-NEXT: outsb (%rsi), %dx +0xf3 0x6e #rep outs +# CHECK: rep +# CHECK-NEXT: outsl (%rsi), %dx +0xf3 0x6f #rep outs +# CHECK: rep +# CHECK-NEXT: lodsb (%rsi), %al +0xf3 0xac #rep lods +# CHECK: rep +# CHECK-NEXT: lodsl (%rsi), %eax +0xf3 0xad #rep lods +# CHECK: rep +# CHECK-NEXT: stosb %al, %es:(%rdi) +0xf3 0xaa #rep stos +# CHECK: rep +# CHECK-NEXT: stosl %eax, %es:(%rdi) +0xf3 0xab #rep stos +# CHECK: rep +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf3 0xa6 #rep cmps +# CHECK: rep +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf3 0xa7 #repe cmps +# CHECK: rep +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf3 0xae #repe scas +# CHECK: rep +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf3 0xaf #repe scas +# CHECK: repne +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf2 0xa6 #repne cmps +# CHECK: repne +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf2 0xa7 #repne cmps +# CHECK: repne +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf2 0xae #repne scas +# CHECK: repne +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf2 0xaf #repne scas + # CHECK: lock # CHECK-NEXT: orl $16, %fs:776 0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10 @@ -50,7 +105,6 @@ # Test that multiple redundant prefixes work (redundant, but valid x86). # CHECK: rep -# CHECK-NEXT: rep # CHECK-NEXT: stosq 0xf3 0xf3 0x48 0xab diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt index 5a09550a708..bf73338ed4f 100644 --- a/test/MC/Disassembler/X86/x86-32.txt +++ b/test/MC/Disassembler/X86/x86-32.txt @@ -797,3 +797,14 @@ # CHECK: nopw %ax 0x66 0x0f 0x1f 0xc0 + +# CHECK: movw %bx, %cs:(%esi,%ebp) +0x2e 0x66 0x89 0x1c 0x2e +# CHECK: movl %ebx, %cs:(%si) +0x2e 0x67 0x89 0x1c +# CHECK: movl %ebx, %cs:(%esi,%ebp) +0x2e 0x89 0x1c 0x2e +# CHECK: movw %bx, %cs:(%si) +0x2e 0x67 0x66 0x89 0x1c +# CHECK: movw %bx, %cs:(%si) +0x2e 0x66 0x67 0x89 0x1c diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt index dbfff0aed9b..6061e311556 100644 --- a/test/MC/Disassembler/X86/x86-64.txt +++ b/test/MC/Disassembler/X86/x86-64.txt @@ -486,3 +486,18 @@ # CHECK: nopq %rax 0x48 0x0f 0x1f 0xC0 + +# CHECK: xchgw %di, %ax +0x66 0x3e 0x97 + +# CHECK: movw %bx, %cs:(%rsi,%rbp) +0x2e 0x66 0x89 0x1c 0x2e +# CHECK: movl %ebx, %cs:(%esi,%ebp) +0x2e 0x67 0x89 0x1c 0x2e +# CHECK: movl %ebx, %cs:(%rsi,%rbp) +0x2e 0x89 0x1c 0x2e +# CHECK: movw %bx, %cs:(%esi,%ebp) +0x2e 0x67 0x66 0x89 0x1c 0x2e +# CHECK: movw %bx, %cs:(%esi,%ebp) +0x2e 0x66 0x67 0x89 0x1c 0x2e + diff --git a/test/MC/X86/intel-syntax-encoding.s b/test/MC/X86/intel-syntax-encoding.s index e15f6470cf1..aedd74447d6 100644 --- a/test/MC/X86/intel-syntax-encoding.s +++ b/test/MC/X86/intel-syntax-encoding.s @@ -54,12 +54,10 @@ acquire lock add [rax], rax // CHECK: encoding: [0xf2] -// CHECK: encoding: [0xf0] -// CHECK: encoding: [0x48,0x01,0x00] +// CHECK: encoding: [0xf0,0x48,0x01,0x00] release lock add [rax], rax // CHECK: encoding: [0xf3] -// CHECK: encoding: [0xf0] -// CHECK: encoding: [0x48,0x01,0x00] +// CHECK: encoding: [0xf0,0x48,0x01,0x00] // CHECK: encoding: [0x9c] // CHECK: encoding: [0x9d] diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index 1fe17831e7e..d697428a2c0 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -902,56 +902,48 @@ lock/incl 1(%rsp) lock addq %rsi, (%rdi) // CHECK: lock -// CHECK: encoding: [0xf0] // CHECK: addq %rsi, (%rdi) -// CHECK: encoding: [0x48,0x01,0x37] +// CHECK: encoding: [0xf0,0x48,0x01,0x37] lock subq %rsi, (%rdi) // CHECK: lock -// CHECK: encoding: [0xf0] // CHECK: subq %rsi, (%rdi) -// CHECK: encoding: [0x48,0x29,0x37] +// CHECK: encoding: [0xf0,0x48,0x29,0x37] lock andq %rsi, (%rdi) // CHECK: lock -// CHECK: encoding: [0xf0] // CHECK: andq %rsi, (%rdi) -// CHECK: encoding: [0x48,0x21,0x37] +// CHECK: encoding: [0xf0,0x48,0x21,0x37] lock orq %rsi, (%rdi) // CHECK: lock -// CHECK: encoding: [0xf0] // CHECK: orq %rsi, (%rdi) -// CHECK: encoding: [0x48,0x09,0x37] +// CHECK: encoding: [0xf0,0x48,0x09,0x37] lock xorq %rsi, (%rdi) // CHECK: lock -// CHECK: encoding: [0xf0] // CHECK: xorq %rsi, (%rdi) -// CHECK: encoding: [0x48,0x31,0x37] +// CHECK: encoding: [0xf0,0x48,0x31,0x37] xacquire lock addq %rax, (%rax) // CHECK: xacquire // CHECK: encoding: [0xf2] // CHECK: lock -// CHECK: encoding: [0xf0] // CHECK: addq %rax, (%rax) -// CHECK: encoding: [0x48,0x01,0x00] +// CHECK: encoding: [0xf0,0x48,0x01,0x00] xrelease lock addq %rax, (%rax) // CHECK: xrelease // CHECK: encoding: [0xf3] // CHECK: lock -// CHECK: encoding: [0xf0] // CHECK: addq %rax, (%rax) -// CHECK: encoding: [0x48,0x01,0x00] +// CHECK: encoding: [0xf0,0x48,0x01,0x00] // rdar://8033482 rep movsl // CHECK: rep -// CHECK: encoding: [0xf3] // CHECK: movsl -// CHECK: encoding: [0xa5] +// CHECK: encoding: [0xf3,0xa5] // rdar://8403974