typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
SmallVector<ICToken, 4> PostfixStack;
-
+
public:
int64_t popOperand() {
assert (!PostfixStack.empty() && "Poped an empty stack!");
"Unexpected operand!");
PostfixStack.push_back(std::make_pair(Op, Val));
}
-
+
void popOperator() { InfixOperatorStack.pop_back(); }
void pushOperator(InfixCalculatorTok Op) {
// Push the new operator if the stack is empty.
InfixOperatorStack.push_back(Op);
return;
}
-
+
// Push the new operator if it has a higher precedence than the operator
// on the top of the stack or the operator on the top of the stack is a
// left parentheses.
InfixOperatorStack.push_back(Op);
return;
}
-
+
// The operator on the top of the stack has higher precedence than the
// new operator.
unsigned ParenCount = 0;
// Nothing to process.
if (InfixOperatorStack.empty())
break;
-
+
Idx = InfixOperatorStack.size() - 1;
StackOp = InfixOperatorStack[Idx];
if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
break;
-
+
// If we have an even parentheses count and we see a left parentheses,
// then stop processing.
if (!ParenCount && StackOp == IC_LPAREN)
break;
-
+
if (StackOp == IC_RPAREN) {
++ParenCount;
InfixOperatorStack.pop_back();
if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
PostfixStack.push_back(std::make_pair(StackOp, 0));
}
-
+
if (PostfixStack.empty())
return 0;
-
+
SmallVector<ICToken, 16> OperandStack;
for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
ICToken Op = PostfixStack[i];
State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
AddImmPrefix(addimmprefix) { Info.clear(); }
-
+
unsigned getBaseReg() { return BaseReg; }
unsigned getIndexReg() { return IndexReg; }
unsigned getScale() { return Scale; }
(*I).Kind = AOK_Delete;
}
const char *SymLocPtr = SymName.data();
- // Skip everything before the symbol.
+ // Skip everything before the symbol.
if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
assert(Len > 0 && "Expected a non-negative length.");
AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
// identifier. Don't try an parse it as a register.
if (Tok.getString().startswith("."))
break;
-
+
// If we're parsing an immediate expression, we don't expect a '['.
if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
break;
MCSymbol *Sym =
getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- const MCExpr *Val =
+ const MCExpr *Val =
MCSymbolRefExpr::Create(Sym, Variant, getContext());
if (IDVal == "b" && Sym->isUndefined())
return Error(Loc, "invalid reference to undefined symbol");
const MCExpr *NewDisp;
if (ParseIntelDotOperator(Disp, NewDisp))
return nullptr;
-
+
End = Tok.getEndLoc();
Parser.Lex(); // Eat the field.
Disp = NewDisp;
#define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
-namespace llvm {
-
+namespace llvm {
+
// Fill-ins to make the compiler happy. These constants are never actually
// assigned; they are just filler to make an automatically-generated switch
// statement work.
static void logger(void* arg, const char* log) {
if (!arg)
return;
-
+
raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
vStream << log << "\n";
-}
-
+}
+
//
// Public interface for the disassembler
//
}
/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
-/// immediate Value in the MCInst.
+/// immediate Value in the MCInst.
///
/// @param Value - The immediate Value, has had any PC adjustment made by
/// the caller.
/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
/// called then that function is called to get any symbolic information for the
/// immediate in the instruction using the Address, Offset and Width. If that
-/// returns non-zero then the symbolic information it returns is used to create
+/// returns non-zero then the symbolic information it returns is used to create
/// an MCExpr and that is added as an operand to the MCInst. If getOpInfo()
/// returns zero and isBranch is true then a symbol look up for immediate Value
/// is done and if a symbol is found an MCExpr is created with that, else
/// if it adds an operand to the MCInst and false otherwise.
static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
uint64_t Address, uint64_t Offset,
- uint64_t Width, MCInst &MI,
- const MCDisassembler *Dis) {
+ uint64_t Width, MCInst &MI,
+ const MCDisassembler *Dis) {
return Dis->tryAddingSymbolicOperand(MI, Value, Address, isBranch,
Offset, Width);
}
/// These can often be addresses in a literal pool. The Address of the
/// instruction and its immediate Value are used to determine the address
/// being referenced in the literal pool entry. The SymbolLookUp call back will
-/// return a pointer to a literal 'C' string if the referenced address is an
+/// return a pointer to a literal 'C' string if the referenced address is an
/// address into a section with 'C' string literals.
static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
const void *Decoder) {
static void translateImmediate(MCInst &mcInst, uint64_t immediate,
const OperandSpecifier &operand,
InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
// Sign-extend the immediate if necessary.
OperandType type = (OperandType)operand.type;
debug("A R/M register operand may not have a SIB byte");
return true;
}
-
+
switch (insn.eaBase) {
default:
debug("Unexpected EA base register");
ALL_REGS
#undef ENTRY
}
-
+
return false;
}
/// from.
/// @return - 0 on success; nonzero otherwise
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
// Addresses in an MCInst are represented as five operands:
- // 1. basereg (register) The R/M base, or (if there is a SIB) the
+ // 1. basereg (register) The R/M base, or (if there is a SIB) the
// SIB base
- // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
+ // 2. scaleamount (immediate) 1, or (if there is a SIB) the specified
// scale amount
// 3. indexreg (register) x86_registerNONE, or (if there is a SIB)
- // the index (which is multiplied by the
+ // the index (which is multiplied by the
// scale amount)
// 4. displacement (immediate) 0, or the displacement if there is one
// 5. segmentreg (register) x86_registerNONE for now, but could be set
// if we have segment overrides
-
+
MCOperand baseReg;
MCOperand scaleAmount;
MCOperand indexReg;
MCOperand displacement;
MCOperand segmentReg;
uint64_t pcrel = 0;
-
+
if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
if (insn.sibBase != SIB_BASE_NONE) {
switch (insn.sibBase) {
(insn.addressSize == 8 ? SIB_INDEX_RAX:SIB_INDEX_EAX);
SIBIndex IndexBase = IndexIs512 ? SIB_INDEX_ZMM0 :
IndexIs256 ? SIB_INDEX_YMM0 : SIB_INDEX_XMM0;
- insn.sibIndex = (SIBIndex)(IndexBase +
+ insn.sibIndex = (SIBIndex)(IndexBase +
(insn.sibIndex == SIB_INDEX_NONE ? 4 : IndexOffset));
}
} else {
indexReg = MCOperand::CreateReg(0);
}
-
+
scaleAmount = MCOperand::CreateImm(insn.sibScale);
} else {
switch (insn.eaBase) {
}
else
baseReg = MCOperand::CreateReg(0);
-
+
indexReg = MCOperand::CreateReg(0);
break;
case EA_BASE_BX_SI:
// placeholders to keep the compiler happy.
#define ENTRY(x) \
case EA_BASE_##x: \
- baseReg = MCOperand::CreateReg(X86::x); break;
+ baseReg = MCOperand::CreateReg(X86::x); break;
ALL_EA_BASES
#undef ENTRY
#define ENTRY(x) case EA_REG_##x:
return true;
}
}
-
+
scaleAmount = MCOperand::CreateImm(1);
}
-
+
displacement = MCOperand::CreateImm(insn.displacement);
segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
-
+
mcInst.addOperand(baseReg);
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
/// from.
/// @return - 0 on success; nonzero otherwise
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
- InternalInstruction &insn, const MCDisassembler *Dis) {
+ InternalInstruction &insn, const MCDisassembler *Dis) {
switch (operand.type) {
default:
debug("Unexpected type for a R/M operand");
return translateRMMemory(mcInst, insn, Dis);
}
}
-
+
/// translateFPRegister - Translates a stack position on the FPU stack to its
/// LLVM form, and appends it to an MCInst.
///
return false;
}
-/// translateOperand - Translates an operand stored in an internal instruction
+/// translateOperand - Translates an operand stored in an internal instruction
/// to LLVM's format and appends it to an MCInst.
///
/// @param mcInst - The MCInst to append to.
/// @return - false on success; true otherwise.
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
switch (operand.encoding) {
default:
debug("Unhandled operand encoding during translation");
insn, Dis);
}
}
-
+
/// translateInstruction - Translates an internal instruction and all its
/// operands to an MCInst.
///
/// @return - false on success; true otherwise.
static bool translateInstruction(MCInst &mcInst,
InternalInstruction &insn,
- const MCDisassembler *Dis) {
+ const MCDisassembler *Dis) {
if (!insn.spec) {
debug("Instruction has no specification");
return true;
}
-
+
mcInst.setOpcode(insn.instructionID);
// If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
// prefix bytes should be disassembled as xrelease and xacquire then set the
else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
mcInst.setOpcode(X86::XACQUIRE_PREFIX);
}
-
+
insn.numImmediatesTranslated = 0;
-
+
for (const auto &Op : insn.operands) {
if (Op.encoding != ENCODING_NONE) {
if (translateOperand(mcInst, Op, insn, Dis)) {
}
}
}
-
+
return false;
}
return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));
}
-extern "C" void LLVMInitializeX86Disassembler() {
+extern "C" void LLVMInitializeX86Disassembler() {
// Register the disassembler.
- TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+ TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
createX86Disassembler);
TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
createX86Disassembler);
int64_t Address;
if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
O << formatHex((uint64_t)Address);
- }
- else {
+ } else {
// Otherwise, just print the expression.
O << *Op.getExpr();
}
void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
-
+
void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
private:
bool HasCustomInstComment;
};
-
+
}
#endif
const MCOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
const MCOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
const MCOperand &SegReg = MI->getOperand(Op+X86::AddrSegmentReg);
-
+
// If this has a segment register, print it.
if (SegReg.getReg()) {
printOperand(MI, Op+X86::AddrSegmentReg, O);
O << ':';
}
-
+
O << '[';
-
+
bool NeedPlus = false;
if (BaseReg.getReg()) {
printOperand(MI, Op+X86::AddrBaseReg, O);
NeedPlus = true;
}
-
+
if (IndexReg.getReg()) {
if (NeedPlus) O << " + ";
if (ScaleVal != 1)
O << formatImm(DispVal);
}
}
-
+
O << ']';
}
O << "opaque ptr ";
printMemReference(MI, OpNo, O);
}
-
+
void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "byte ptr ";
printMemReference(MI, OpNo, O);
printMemOffset(MI, OpNo, O);
}
};
-
+
}
#endif
// Defines a new offset for the CFA. E.g.
//
// With frame:
- //
+ //
// pushq %rbp
// L0:
// .cfi_def_cfa_offset 16
(RegNo > X86::ZMM15 && RegNo <= X86::ZMM31));
}
-
+
inline bool isX86_64NonExtLowByteReg(unsigned reg) {
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
// MemAddr, src1(VEX_4V), src2(ModR/M)
// MemAddr, src1(ModR/M), imm8
//
- if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
X86::AddrBaseReg).getReg()))
VEX_B = 0x0;
if (X86II::isX86_64ExtendedReg(MI.getOperand(MemOperand +
EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3;
}
EncodeRC = true;
- }
+ }
break;
case X86II::MRMDestReg:
// MRMDestReg instructions forms:
enum {
X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
};
-}
-
+}
+
/// N86 namespace - Native X86 register numbers
///
namespace N86 {
if (A->isUndefined() || B->isUndefined()) {
StringRef Name = A->isUndefined() ? A->getName() : B->getName();
Asm.getContext().FatalError(Fixup.getLoc(),
- "unsupported relocation with subtraction expression, symbol '" +
+ "unsupported relocation with subtraction expression, symbol '" +
Name + "' can not be undefined in a subtraction expression");
}
if (A_Base) {
Index = A_Base->getIndex();
IsExtern = 1;
- }
- else {
+ } else {
Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
}
if (B_Base) {
Index = B_Base->getIndex();
IsExtern = 1;
- }
- else {
+ } else {
Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
}
Target llvm::TheX86_32Target, llvm::TheX86_64Target;
-extern "C" void LLVMInitializeX86TargetInfo() {
+extern "C" void LLVMInitializeX86TargetInfo() {
RegisterTarget<Triple::x86, /*HasJIT=*/true>
X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
FeatureSlowSHLD]>;
def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem,
FeatureSlowSHLD]>;
// be good for modern chips without enabling instruction set encodings past the
// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
// modern 64-bit x86 chip, and enables features that are generally beneficial.
-//
+//
// We currently use the Sandy Bridge model as the default scheduling model as
// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
// covers a huge swath of x86 processors. If there are specific scheduling
unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
- unsigned X86MaterializeGV(const GlobalValue *GV,MVT VT);
+ unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
unsigned fastMaterializeConstant(const Constant *C) override;
unsigned fastMaterializeAlloca(const AllocaInst *C) override;
// Ok, we need to do a load from a stub. If we've already loaded from
// this stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
unsigned LoadReg;
if (I != LocalValueMap.end() && I->second != 0) {
LoadReg = I->second;
case Instruction::Alloca: {
// Do static allocas.
const AllocaInst *A = cast<AllocaInst>(V);
- DenseMap<const AllocaInst*, int>::iterator SI =
+ DenseMap<const AllocaInst *, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(A);
if (SI != FuncInfo.StaticAllocaMap.end()) {
AM.BaseType = X86AddressMode::FrameIndexBase;
unsigned Alignment = S->getAlignment();
unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
+ if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = ABIAlignment;
bool Aligned = Alignment >= ABIAlignment;
// Make the copy.
unsigned DstReg = VA.getLocReg();
- const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
// Avoid a cross-class copy. This is very unlikely.
if (!SrcRC->contains(DstReg))
return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- DstReg).addReg(SrcReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
// Add register to return instruction.
RetRegs.push_back(VA.getLocReg());
assert(Reg &&
"SRetReturnReg should have been set in LowerFormalArguments()!");
unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- RetReg).addReg(Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
RetRegs.push_back(RetReg);
}
// Now emit the RET.
MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
return true;
}
-
bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Unconditional branches are selected by tablegen-generated code.
// Handle a conditional branch.
TII.get(X86::MOV32r0), Zero32);
// Copy the zero into the appropriate sub/super/identical physical
- // register. Unfortunately the operations needed are not uniform enough to
- // fit neatly into the table above.
+ // register. Unfortunately the operations needed are not uniform enough
+ // to fit neatly into the table above.
if (VT.SimpleTy == MVT::i16) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), TypeEntry.HighInReg)
EVT CmpVT = TLI.getValueType(CmpLHS->getType());
// Emit a compare of the LHS and RHS, setting the flags.
if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT))
- return false;
+ return false;
if (SETFOpc) {
unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
if (I->getType() != CI->getOperand(0)->getType() ||
!((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
- (Subtarget->hasSSE2() && RetVT == MVT::f64) ))
+ (Subtarget->hasSSE2() && RetVT == MVT::f64)))
return false;
const Value *CmpLHS = CI->getOperand(0);
const TargetRegisterClass *CopyRC =
(SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- CopyReg).addReg(InputReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
InputReg = CopyReg;
}
VT = MVT::i32;
else if (Len >= 2)
VT = MVT::i16;
- else {
+ else
VT = MVT::i8;
- }
unsigned Reg;
bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
if (!Subtarget->is64Bit())
return false;
-
+
// Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
unsigned GPRCnt = 0;
unsigned FPRCnt = 0;
if (!X86SelectAddress(Ptr, AM))
return false;
- const X86InstrInfo &XII = (const X86InstrInfo&)TII;
+ const X86InstrInfo &XII = (const X86InstrInfo &)TII;
unsigned Size = DL.getTypeAllocSize(LI->getType());
unsigned Alignment = LI->getAlignment();
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMacho());
-
+
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
// If required, include space for extra hidden slot for stashing base pointer.
- if (X86FI->getRestoreBasePointer())
+ if (X86FI->getRestoreBasePointer())
FrameSize += SlotSize;
if (RegInfo->needsStackRealignment(MF)) {
// Callee-saved registers are pushed on stack before the stack
int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
// Does not include any dynamic realign.
- const uint64_t StackSize = MFI->getStackSize();
+ const uint64_t StackSize = MFI->getStackSize();
{
#ifndef NDEBUG
const X86RegisterInfo *RegInfo =
// refer to arguments to the function which are stored in the *callers*
// frame). As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
// AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
-
+
assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
// We don't handle tail calls, and shouldn't be seeing them
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
- SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
+ SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
// Bypass expensive divides on Atom when compiling with O2.
if (TM.getOptLevel() >= CodeGenOpt::Default) {
- if (Subtarget->hasSlowDivide32())
+ if (Subtarget->hasSlowDivide32())
addBypassSlowDiv(32, 8);
if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
addBypassSlowDiv(64, 16);
ValToCopy = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), ValToCopy);
assert(VA.getLocInfo() != CCValAssign::FPExt &&
- "Unexpected FP-extend for return value.");
+ "Unexpected FP-extend for return value.");
// If this is x86-64, and we disabled SSE, we can't return FP values,
// or SSE or MMX vectors.
// In PIC we need an extra register to formulate the address computation
// for the callee.
unsigned MaxInRegs =
- (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
+ (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
if (Zeroable[i])
continue;
-
+
SDValue Current = Op->getOperand(i);
SDValue SrcVector = Current->getOperand(0);
if (!V1.getNode())
AllContants = false;
NonConstIdx = idx;
NumNonConsts++;
- }
- else {
+ } else {
NumConsts++;
if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
MVT::getIntegerVT(VT.getSizeInBits()));
DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
}
- else
+ else
DstVec = DAG.getUNDEF(VT);
return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
Op.getOperand(NonConstIdx),
/// \brief Return true if \p N implements a horizontal binop and return the
/// operands for the horizontal binop into V0 and V1.
-///
+///
/// This is a helper function of PerformBUILD_VECTORCombine.
/// This function checks that the build_vector \p N in input implements a
/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!");
assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&
"Invalid Vector in input!");
-
+
bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
bool CanFold = true;
unsigned ExpectedVExtractIdx = BaseIdx;
}
/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
-/// a concat_vector.
+/// a concat_vector.
///
/// This is a helper function of PerformBUILD_VECTORCombine.
/// This function expects two 256-bit vectors called V0 and V1.
/// At first, each vector is split into two separate 128-bit vectors.
/// Then, the resulting 128-bit vectors are used to implement two
-/// horizontal binary operations.
+/// horizontal binary operations.
///
/// The kind of horizontal binary operation is defined by \p X86Opcode.
///
// Try to match an SSE3 float HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
-
+
if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
} else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) {
// Try to match an SSSE3 integer HADD/HSUB.
if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
-
+
if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
}
-
+
if (!Subtarget->hasAVX())
return SDValue();
// Do this only if the target has AVX2.
if (Subtarget->hasAVX2())
return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
-
+
// Do not try to expand this build_vector into a pair of horizontal
// add/sub if we can emit a pair of scalar add/sub.
if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
/// does not check for the profitability of lowering either as PALIGNR or
/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
/// This matches shuffle vectors that look like:
-///
+///
/// v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
-///
+///
/// Essentially it concatenates V1 and V2, shifts right by some number of
/// elements, and takes the low elements as the result. Note that while this is
/// specified as a *right shift* because x86 is little-endian, it is a *left
/// Insert one bit to mask vector, like v16i1 or v8i1.
/// AVX-512 feature.
-SDValue
+SDValue
X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
SDValue Vec = Op.getOperand(0);
// insert element and then truncate the result.
MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
- SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
+ SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
}
return SDValue();
}
-
+
assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 &&
"Unknown SINT_TO_FP to lower!");
In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
InVT = ExtVT;
}
-
+
SDValue Cst = DAG.getTargetConstant(1, InVT.getVectorElementType());
const Constant *C = (dyn_cast<ConstantSDNode>(Cst))->getConstantIntValue();
SDValue CP = DAG.getConstantPool(C, getPointerTy());
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
}
-
+
unsigned EltBits = EltVT.getSizeInBits();
LLVMContext *Context = DAG.getContext();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getNode(BitOp, dl, VecVT, Operand, MaskCasted));
}
-
+
// If not vector, then scalar.
unsigned BitOp = IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
if (Op0.getValueType() == MVT::i1)
llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
}
-
+
if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
- // Do the comparison at i32 if it's smaller, besides the Atom case.
- // This avoids subregister aliasing issues. Keep the smaller reference
- // if we're optimizing for size, however, as that'll allow better folding
+ // Do the comparison at i32 if it's smaller, besides the Atom case.
+ // This avoids subregister aliasing issues. Keep the smaller reference
+ // if we're optimizing for size, however, as that'll allow better folding
// of memory operations.
if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
!DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
return SDValue();
EVT VT = Op.getValueType();
-
+
// SSE1 has rsqrtss and rsqrtps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
// significant digits in the divisor.
if (!Subtarget->useReciprocalEst())
return SDValue();
-
+
EVT VT = Op.getValueType();
-
+
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// It is likely not profitable to do this for f64 because a double-precision
((Subtarget->hasDQI() && Subtarget->hasVLX() &&
VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) ||
-
+
((Subtarget->hasDQI() && VT.is512BitVector() &&
VTElt.getSizeInBits() >= 32))))
return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
-
+
unsigned int NumElts = VT.getVectorNumElements();
if (NumElts != 8 && NumElts != 16)
return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
RoundingMode),
Mask, Src0, Subtarget, DAG);
- }
+ }
case INTR_TYPE_2OP_MASK: {
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
Op.getOperand(2)),
Op.getOperand(4), Op.getOperand(3), Subtarget, DAG);
- }
+ }
case CMP_MASK:
case CMP_MASK_CC: {
// Comparison intrinsics with masks.
switch(IntrData->Type) {
default:
llvm_unreachable("Unknown Intrinsic Type");
- break;
+ break;
case RDSEED:
case RDRAND: {
// Emit the node with the right value type.
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
// Do this only if the vector shift count is a constant build_vector.
- if (Op.getOpcode() == ISD::SHL &&
+ if (Op.getOpcode() == ISD::SHL &&
(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget->hasInt256() && VT == MVT::v16i16)) &&
ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
CanBeSimplified = Amt2 == Amt->getOperand(j);
}
}
-
+
if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
isa<ConstantSDNode>(Amt2)) {
// Replace this node with two shifts followed by a MOVSS/MOVSD.
EVT CastVT = MVT::v4i32;
- SDValue Splat1 =
+ SDValue Splat1 =
DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), VT);
SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
- SDValue Splat2 =
+ SDValue Splat2 =
DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), VT);
SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
if (TargetOpcode == X86ISD::MOVSD)
.addReg(restoreDstReg).addMBB(restoreMBB);
// restoreMBB:
- if (RegInfo->hasBasePointer(*MF)) {
+ if (RegInfo->hasBasePointer(*MF)) {
const X86Subtarget &STI = MF->getTarget().getSubtarget<X86Subtarget>();
const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
// Replace 213-type (isel default) FMA3 instructions with 231-type for
// accumulator loops. Writing back to the accumulator allows the coalescer
-// to remove extra copies in the loop.
+// to remove extra copies in the loop.
MachineBasicBlock *
X86TargetLowering::emitFMA3Instr(MachineInstr *MI,
MachineBasicBlock *MBB) const {
EVT SVT = BC0.getValueType();
unsigned Opcode = BC0.getOpcode();
unsigned NumElts = VT.getVectorNumElements();
-
+
if (BC0.hasOneUse() && SVT.isVector() &&
SVT.getVectorNumElements() * 2 == NumElts &&
TLI.isOperationLegal(Opcode, VT)) {
// fold (blend A, B, allOnes) -> B
if (ISD::isBuildVectorAllOnes(Mask.getNode()))
return Op1;
-
+
// Simplify the case where the mask is a constant i32 value.
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
if (C->isNullValue())
// "load" ports instead of the dedicated "store" port.
// E.g., on Haswell:
// vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
- // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
+ // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
if (isLegalAddressingMode(AM, Ty))
// Scale represents reg2 * scale, thus account for 1
// as soon as we use a second register.
int NumEltsInVT = !if (!eq (NumElts, 1),
!if (!eq (EltVT.Size, 32), 4,
!if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts);
-
+
string VTName = "v" # NumEltsInVT # EltVT;
// The vector VT.
def Zrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst), (ins SrcRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[]>, EVEX, EVEX_V512;
- def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
+ def Zkrr : AVX5128I<opc, MRMSrcReg, (outs VR512:$dst),
(ins KRC:$mask, SrcRC:$src),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
[]>, EVEX, EVEX_V512, EVEX_KZ;
}
defm VPBROADCASTDr : avx512_int_broadcast_reg<0x7C, "vpbroadcastd", GR32, VK16WM>;
defm VPBROADCASTQr : avx512_int_broadcast_reg<0x7C, "vpbroadcastq", GR64, VK8WM>,
VEX_W;
-
+
def : Pat <(v16i32 (X86vzext VK16WM:$mask)),
(VPBROADCASTDrZkrr VK16WM:$mask, (i32 (MOV32ri 0x1)))>;
(OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
def krr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
VR128X:$src),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
[(set DstRC:$dst,
(OpVT (X86VBroadcastm KRC:$mask, (SrcVT VR128X:$src))))]>,
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
+ [(set DstRC:$dst,
(OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
def krm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
x86memop:$src),
- !strconcat(OpcodeStr,
+ !strconcat(OpcodeStr,
"\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
- [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
+ [(set DstRC:$dst, (OpVT (X86VBroadcastm KRC:$mask,
(ld_frag addr:$src))))]>, EVEX, EVEX_KZ;
}
}
(VBROADCASTSSZr VR128X:$src)>;
def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
(VBROADCASTSDZr VR128X:$src)>;
-
+
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
let Predicates = [HasAVX512] in {
def : Pat<(v8i32 (X86VBroadcastm (v8i1 VK8WM:$mask), (loadi32 addr:$src))),
- (EXTRACT_SUBREG
+ (EXTRACT_SUBREG
(v16i32 (VPBROADCASTDZkrm (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
addr:$src)), sub_ymm)>;
}
def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[]>, EVEX, EVEX_V512;
-
+
let Predicates = [HasCDI, HasVLX] in {
def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128:$dst), (ins KRC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
(VPERMILPDZri VR512:$src1, imm:$imm)>;
// -- VPERM - register form --
-multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
+multiclass avx512_perm<bits<8> opc, string OpcodeStr, RegisterClass RC,
PatFrag mem_frag, X86MemOperand x86memop, ValueType OpVT> {
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
defm VPERMDZ : avx512_perm<0x36, "vpermd", VR512, memopv16i32, i512mem,
v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
+defm VPERMQZ : avx512_perm<0x36, "vpermq", VR512, memopv8i64, i512mem,
v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
let ExeDomain = SSEPackedSingle in
defm VPERMPSZ : avx512_perm<0x16, "vpermps", VR512, memopv16f32, f512mem,
v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
let ExeDomain = SSEPackedDouble in
-defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
+defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem,
v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
// -- VPERM2I - 3 source operands form --
}
let ExeDomain = SSEPackedSingle in
-defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
+defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps",
VK16WM, VR512, f512mem,
- memopv16f32, vselect, v16f32>,
+ memopv16f32, vselect, v16f32>,
EVEX_CD8<32, CD8VF>, EVEX_V512;
let ExeDomain = SSEPackedDouble in
-defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
+defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd",
VK8WM, VR512, f512mem,
- memopv8f64, vselect, v8f64>,
+ memopv8f64, vselect, v8f64>,
VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
def : Pat<(v16f32 (int_x86_avx512_mask_blend_ps_512 (v16f32 VR512:$src1),
(VBLENDMPDZrr (COPY_TO_REGCLASS GR8:$mask, VK8WM),
VR512:$src1, VR512:$src2)>;
-defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
- VK16WM, VR512, f512mem,
- memopv16i32, vselect, v16i32>,
+defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd",
+ VK16WM, VR512, f512mem,
+ memopv16i32, vselect, v16i32>,
EVEX_CD8<32, CD8VF>, EVEX_V512;
-defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
- VK8WM, VR512, f512mem,
- memopv8i64, vselect, v8i64>,
+defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq",
+ VK8WM, VR512, f512mem,
+ memopv8i64, vselect, v8i64>,
VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512;
def : Pat<(v16i32 (int_x86_avx512_mask_blend_d_512 (v16i32 VR512:$src1),
let Predicates = [HasAVX512] in {
def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
(v8f32 VR256X:$src2))),
- (EXTRACT_SUBREG
- (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (EXTRACT_SUBREG
+ (v16f32 (VBLENDMPSZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
(v8i32 VR256X:$src2))),
- (EXTRACT_SUBREG
- (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
+ (EXTRACT_SUBREG
+ (v16i32 (VPBLENDMDZrr (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
}
FROUND_NO_EXC)),
(COPY_TO_REGCLASS (VCMPPSZrrib VR512:$src1, VR512:$src2,
(I8Imm imm:$cc)), GR16)>;
-
+
def : Pat<(i8 (int_x86_avx512_mask_cmp_pd_512 (v8f64 VR512:$src1),
(v8f64 VR512:$src2), imm:$cc, (i8 -1),
FROUND_NO_EXC)),
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
-multiclass avx512_move_scalar <string asm, RegisterClass RC,
+multiclass avx512_move_scalar <string asm, RegisterClass RC,
SDNode OpNode, ValueType vt,
X86MemOperand x86memop, PatFrag mem_pat> {
let hasSideEffects = 0 in {
- def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
+ def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128X:$dst, (vt (OpNode VR128X:$src1,
(scalar_to_vector RC:$src2))))],
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
- (VMOVSSZrr (v4f32 (V_SET0)),
+ (VMOVSSZrr (v4f32 (V_SET0)),
(EXTRACT_SUBREG (v8f32 VR256X:$src), sub_xmm)), sub_xmm)>;
def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
(SUBREG_TO_REG (i32 0),
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
- [(set VR128X:$dst, (v2i64 (X86vzmovl
+ [(set VR128X:$dst, (v2i64 (X86vzmovl
(v2i64 VR128X:$src))))],
IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
(VMOV64toPQIZrr GR64:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
(VMOVDI2PDIZrr GR32:$src)>;
-
+
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(VMOVDI2PDIZrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
+ [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
IIC_SSE_UNPCK>, EVEX_4V;
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2),
//
multiclass avx512_pshuf_imm<bits<8> opc, string OpcodeStr, RegisterClass RC,
- SDNode OpNode, PatFrag mem_frag,
+ SDNode OpNode, PatFrag mem_frag,
X86MemOperand x86memop, ValueType OpVT> {
def ri : AVX512Ii8<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, i8imm:$src2),
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
-multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
+multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
SDNode OpNode, ValueType vt> {
def rr : AVX512PI<opc, MRMSrcReg,
- (outs KRC:$dst), (ins RC:$src1, RC:$src2),
+ (outs KRC:$dst), (ins RC:$src1, RC:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))],
SSEPackedInt>, EVEX_4V;
def rm : AVX512PI<opc, MRMSrcMem,
- (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
+ (outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set KRC:$dst, (OpNode (vt RC:$src1),
+ [(set KRC:$dst, (OpNode (vt RC:$src1),
(bitconvert (memop_frag addr:$src2))))], SSEPackedInt>, EVEX_4V;
}
// AVX-512 Shift instructions
//===----------------------------------------------------------------------===//
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
- string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+ string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, i8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, SrcVT, bc_frag, _>, EVEX_V512;
}
-multiclass avx512_varshift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr,
+multiclass avx512_varshift_types<bits<8> opcd, bits<8> opcq, string OpcodeStr,
SDNode OpNode> {
- defm D : avx512_varshift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
- v16i32_info>, EVEX_CD8<32, CD8VQ>;
- defm Q : avx512_varshift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
+ defm D : avx512_varshift_sizes<opcd, OpcodeStr#"d", OpNode, v4i32, bc_v4i32,
+ v16i32_info>, EVEX_CD8<32, CD8VQ>;
+ defm Q : avx512_varshift_sizes<opcq, OpcodeStr#"q", OpNode, v2i64, bc_v2i64,
v8i64_info>, EVEX_CD8<64, CD8VQ>, VEX_W;
}
EVEX_4V;
}
-defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
+defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
i512mem, memopv16i32>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
-defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
+defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
i512mem, memopv8i64>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
-defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
+defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
i512mem, memopv16i32>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
-defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
+defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
i512mem, memopv8i64>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
-defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
+defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
i512mem, memopv16i32>, EVEX_V512,
EVEX_CD8<32, CD8VF>;
-defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
+defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
i512mem, memopv8i64>, EVEX_V512, VEX_W,
EVEX_CD8<64, CD8VF>;
// AVX-512 - MOVDDUP
//===----------------------------------------------------------------------===//
-multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
+multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
X86MemOperand x86memop, PatFrag memop_frag> {
def rr : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
// Scalar FMA
let Constraints = "$src1 = $dst" in {
-multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- RegisterClass RC, ValueType OpVT,
- X86MemOperand x86memop, Operand memop,
+multiclass avx512_fma3s_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType OpVT,
+ X86MemOperand x86memop, Operand memop,
PatFrag mem_frag> {
let isCommutable = 1 in
def r : AVX512FMA3<opc, MRMSrcReg, (outs RC:$dst),
def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
Requires<[HasAVX512]>;
-multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
- RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
+multiclass avx512_vcvt_fp_with_rc<bits<8> opc, string asm, RegisterClass SrcRC,
+ RegisterClass DstRC, SDNode OpNode, PatFrag mem_frag,
X86MemOperand x86memop, ValueType OpVT, ValueType InVT,
Domain d> {
let hasSideEffects = 0 in {
EVEX_CD8<32, CD8VH>;
def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
-
+
def : Pat<(v8f32 (int_x86_avx512_mask_cvtpd2ps_512 (v8f64 VR512:$src),
(bc_v8f32(v8i32 immAllZerosV)), (i8 -1), (i32 FROUND_CURRENT))),
(VCVTPD2PSZrr VR512:$src)>;
EVEX_CD8<32, CD8VF>;
defm VCVTTPD2DQZ : avx512_vcvt_fp<0xE6, "vcvttpd2dq", VR512, VR256X, fp_to_sint,
- memopv8f64, f512mem, v8i32, v8f64,
+ memopv8f64, f512mem, v8i32, v8f64,
SSEPackedDouble>, EVEX_V512, PD, VEX_W,
EVEX_CD8<64, CD8VF>;
memopv8f64, f512mem, v8i32, v8f64,
SSEPackedDouble>, EVEX_V512, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
-
+
// cvttpd2udq (src, 0, mask-all-ones, sae-current)
def : Pat<(v8i32 (int_x86_avx512_mask_cvttpd2udq_512 (v8f64 VR512:$src),
(v8i32 immAllZerosV), (i8 -1), FROUND_CURRENT)),
memopv4i64, f256mem, v8f64, v8i32,
SSEPackedDouble>, EVEX_V512, XS,
EVEX_CD8<32, CD8VH>;
-
+
defm VCVTUDQ2PSZ : avx512_vcvt_fp_with_rc<0x7A, "vcvtudq2ps", VR512, VR512, uint_to_fp,
memopv16i32, f512mem, v16f32, v16i32,
SSEPackedSingle>, EVEX_V512, XD,
EVEX_CD8<32, CD8VF>;
def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
- (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
+ (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
+
def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
-
+
def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
-
+
/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
X86MemOperand x86memop> {
}
let Predicates = [HasERI], hasSideEffects = 0 in {
-
+
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28>, EVEX, EVEX_V512, T8PD;
defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28>, EVEX, EVEX_V512, T8PD;
defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2>, EVEX, EVEX_V512, T8PD;
(ins VR128X:$src1, VR128X:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
+ [(set VR128X:$dst,
(F32Int VR128X:$src1, VR128X:$src2))],
itins_s.rr>, XS, EVEX_4V;
let mayLoad = 1 in {
(ins VR128X:$src1, ssmem:$src2),
!strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
+ [(set VR128X:$dst,
(F32Int VR128X:$src1, sse_load_f32:$src2))],
itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
}
(ins VR128X:$src1, VR128X:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
+ [(set VR128X:$dst,
(F64Int VR128X:$src1, VR128X:$src2))],
itins_s.rr>, XD, EVEX_4V, VEX_W;
let mayLoad = 1 in {
(ins VR128X:$src1, sdmem:$src2),
!strconcat(OpcodeStr,
"sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128X:$dst,
- (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
+ [(set VR128X:$dst,
+ (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
}
}
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>;
-defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
- int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
+defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
+ int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
SSE_SQRTSS, SSE_SQRTSD>;
let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
(bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
(VSQRTPDZr VR512:$src1)>;
-
+
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
def : Pat<(f32 (fsqrt (load addr:$src))),
(ins VR128X:$src1, ssmem:$src2, i32i8imm:$src3),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128X:$dst, (F32Int VR128X:$src1,
+ [(set VR128X:$dst, (F32Int VR128X:$src1,
sse_load_f32:$src2, imm:$src3))]>,
EVEX_CD8<32, CD8VT1>;
defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", ssmem, FR32X,
SSEPackedSingle>, EVEX_CD8<32, CD8VT1>;
-
+
defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", sdmem, FR64X,
SSEPackedDouble>, EVEX_CD8<64, CD8VT1>;
[]>, EVEX, EVEX_K;
}
-defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
+defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
defm VPMOVSQB : avx512_trunc_sat<0x22, "vpmovsqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
defm VGATHERQPSZ : avx512_gather<0x93, "vgatherqps", VK8WM, VR256X, vz64mem>,
EVEX_V512, EVEX_CD8<32, CD8VT1>;
}
-
+
defm VPGATHERDQZ : avx512_gather<0x90, "vpgatherdq", VK8WM, VR512, vy64xmem>,
EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VPGATHERDDZ : avx512_gather<0x90, "vpgatherdd", VK16WM, VR512, vz32mem>,
defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-
+
defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
(bc_v8i64 (v16i32 immAllZerosV)), (i8 -1))),
(VPABSQZrr VR512:$src)>;
-multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
+multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
RegisterClass RC, RegisterClass KRC,
X86MemOperand x86memop,
X86MemOperand x86scalar_mop, string BrdcstStr> {
", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
BrdcstStr, "}"),
[]>, EVEX, EVEX_KZ, EVEX_B;
-
+
let Constraints = "$src1 = $dst" in {
def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, KRC:$mask, RC:$src2),
!strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"),
[(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX;
}
-
+
multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo,
string OpcodeStr, Predicate prd> {
let Predicates = [prd] in
defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr,
HasDQI>, VEX_W;
}
-
+
defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
// When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the
// register class is constrained to GR8_NOREX. This pseudo is explicitly
// marked side-effect free, since it doesn't have an isel pattern like
- // other test instructions.
+ // other test instructions.
let isPseudo = 1, hasSideEffects = 0 in
def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
"", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
// FPStack specific DAG Nodes.
//===----------------------------------------------------------------------===//
-def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
+def SDTX86FpGet2 : SDTypeProfile<2, 0, [SDTCisVT<0, f80>,
SDTCisVT<1, f80>]>;
def SDTX86Fld : SDTypeProfile<1, 2, [SDTCisFP<0>,
- SDTCisPtrTy<1>,
+ SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fst : SDTypeProfile<0, 3, [SDTCisFP<0>,
- SDTCisPtrTy<1>,
+ SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
def SDTX86Fild : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
SDTCisVT<2, OtherVT>]>;
// All FP Stack operations are represented with four instructions here. The
// first three instructions, generated by the instruction selector, use "RFP32"
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
-// 64-bit or 80-bit floating point values. These sizes apply to the values,
+// 64-bit or 80-bit floating point values. These sizes apply to the values,
// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
// copied to each other without losing information. These instructions are all
// pseudo instructions and use the "_Fp" suffix.
// The second instruction is defined with FPI, which is the actual instruction
// emitted by the assembler. These use "RST" registers, although frequently
// the actual register(s) used are implicit. These are always 80 bits.
-// The FP stackifier pass converts one to the other after register allocation
+// The FP stackifier pass converts one to the other after register allocation
// occurs.
//
// Note that the FpI instruction should have instruction selection info (e.g.
// These instructions cannot address 80-bit memory.
multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
// ST(0) = ST(0) + [mem]
-def _Fp32m : FpIf32<(outs RFP32:$dst),
+def _Fp32m : FpIf32<(outs RFP32:$dst),
(ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP32:$dst,
+ [(set RFP32:$dst,
(OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
-def _Fp64m : FpIf64<(outs RFP64:$dst),
+def _Fp64m : FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
+ [(set RFP64:$dst,
(OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
-def _Fp64m32: FpIf64<(outs RFP64:$dst),
+def _Fp64m32: FpIf64<(outs RFP64:$dst),
(ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP64:$dst,
+ [(set RFP64:$dst,
(OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
-def _Fp80m32: FpI_<(outs RFP80:$dst),
+def _Fp80m32: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
+ [(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
-def _Fp80m64: FpI_<(outs RFP80:$dst),
+def _Fp80m64: FpI_<(outs RFP80:$dst),
(ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
- [(set RFP80:$dst,
+ [(set RFP80:$dst,
(OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
-def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
- !strconcat("f", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
+def _F32m : FPI<0xD8, fp, (outs), (ins f32mem:$src),
+ !strconcat("f", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
}
-def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
- !strconcat("f", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
+def _F64m : FPI<0xDC, fp, (outs), (ins f64mem:$src),
+ !strconcat("f", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
}
// ST(0) = ST(0) + [memint]
-def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
OneArgFPRW,
[(set RFP32:$dst, (OpNode RFP32:$src1,
(X86fild addr:$src2, i32)))]>;
-def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
OneArgFPRW,
[(set RFP64:$dst, (OpNode RFP64:$src1,
(X86fild addr:$src2, i32)))]>;
-def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
OneArgFPRW,
[(set RFP80:$dst, (OpNode RFP80:$src1,
(X86fild addr:$src2, i16)))]>;
-def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
OneArgFPRW,
[(set RFP80:$dst, (OpNode RFP80:$src1,
(X86fild addr:$src2, i32)))]>;
-def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
- !strconcat("fi", asmstring, "{s}\t$src")> {
- let mayLoad = 1;
+def _FI16m : FPI<0xDE, fp, (outs), (ins i16mem:$src),
+ !strconcat("fi", asmstring, "{s}\t$src")> {
+ let mayLoad = 1;
}
-def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
- !strconcat("fi", asmstring, "{l}\t$src")> {
- let mayLoad = 1;
+def _FI32m : FPI<0xDA, fp, (outs), (ins i32mem:$src),
+ !strconcat("fi", asmstring, "{l}\t$src")> {
+ let mayLoad = 1;
}
}
IIC_FST>;
def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst",
IIC_FST>;
-def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst),
"fisttp{ll}\t$dst", IIC_FST>;
}
def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
"fxsave\t$dst", [], IIC_FXSAVE>, TB;
def FXSAVE64 : RI<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
- "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB,
+ "fxsave{q|64}\t$dst", [], IIC_FXSAVE>, TB,
Requires<[In64BitMode]>;
def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
"fxrstor\t$src", [], IIC_FXRSTOR>, TB;
// Required for CALL which return f32 / f64 / f80 values.
def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
-def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op,
RFP64:$src)>;
def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op,
RFP80:$src)>;
-def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op,
RFP80:$src)>;
def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
RFP80:$src)>;
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary,
Domain d = GenericDomain>
: X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
let Pattern = pattern;
// Iseg16 - 16-bit segment selector, 16-bit offset
// Iseg32 - 16-bit segment selector, 32-bit offset
-class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm16, outs, ins, asm, itin> {
let Pattern = pattern;
let CodeSize = 3;
}
-class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: X86Inst<o, f, Imm32, outs, ins, asm, itin> {
let Pattern = pattern;
}
// SSE1 Instruction Templates:
-//
+//
// SSI - SSE1 instructions with XS prefix.
// PSI - SSE1 instructions with PS prefix.
// PSIi8 - SSE1 instructions with ImmT == Imm8 and PS prefix.
Requires<[HasAVX]>;
// SSE2 Instruction Templates:
-//
+//
// SDI - SSE2 instructions with XD prefix.
// SDIi8 - SSE2 instructions with ImmT == Imm8 and XD prefix.
// S2SI - SSE2 instructions with XS prefix.
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
-//
+//
// S3I - SSE3 instructions with PD prefixes.
// S3SI - SSE3 instructions with XS prefix.
// S3DI - SSE3 instructions with XD prefix.
-class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
Requires<[UseSSE3]>;
-class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
Requires<[UseSSE3]>;
// SSSE3 Instruction Templates:
-//
+//
// SS38I - SSSE3 instructions with T8 prefix.
// SS3AI - SSSE3 instructions with TA prefix.
// MMXSS38I - SSSE3 instructions with T8 prefix and MMX operands.
Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
-//
+//
// SS48I - SSE 4.1 instructions with T8 prefix.
// SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
//
Requires<[UseSSE41]>;
// SSE4.2 Instruction Templates:
-//
+//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
// MMXIi8 - MMX instructions with ImmT == Imm8 and PS prefix.
// MMXID - MMX instructions with XD prefix.
// MMXIS - MMX instructions with XS prefix.
-class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX]>;
-class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXI32<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX,Not64BitMode]>;
-class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX,In64BitMode]>;
-class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PS, REX_W, Requires<[HasMMX]>;
-class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: I<o, F, outs, ins, asm, pattern, itin>, PD, Requires<[HasMMX]>;
-class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, PS, Requires<[HasMMX]>;
-class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
-class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern, InstrItinClass itin = NoItinerary>
: Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
case X86::TEST8ri_NOREX:
MI->setDesc(get(X86::TEST8ri));
return true;
- case X86::KSET0B:
+ case X86::KSET0B:
case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
case X86::KSET1B:
case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void,
- [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
let Predicates = [HasLZCNT] in {
def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E),
- (X86cmp GR16:$src, (i16 0))),
+ (X86cmp GR16:$src, (i16 0))),
(LZCNT16rr GR16:$src)>;
def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E),
(X86cmp GR32:$src, (i32 0))),
(LZCNT64rr GR64:$src)>;
def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(LZCNT16rm addr:$src)>;
def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(LZCNT32rm addr:$src)>;
def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(LZCNT64rm addr:$src)>;
def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(LZCNT16rm addr:$src)>;
def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(LZCNT32rm addr:$src)>;
def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(LZCNT64rm addr:$src)>;
}
(TZCNT64rr GR64:$src)>;
def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(TZCNT16rm addr:$src)>;
def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(TZCNT32rm addr:$src)>;
def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(TZCNT64rm addr:$src)>;
def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E),
- (X86cmp (loadi16 addr:$src), (i16 0))),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
(TZCNT16rm addr:$src)>;
def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E),
- (X86cmp (loadi32 addr:$src), (i32 0))),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
(TZCNT32rm addr:$src)>;
def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E),
- (X86cmp (loadi64 addr:$src), (i64 0))),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
(TZCNT64rm addr:$src)>;
}
multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
def R64irr : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
Sched<[WriteShuffle]>;
def R64irm : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
// Data Transfer Instructions
def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
+ [(set VR64:$dst,
(x86mmx (scalar_to_vector GR32:$src)))],
IIC_MMX_MOV_MM_RM>, Sched<[WriteMove]>;
let canFoldAsLoad = 1 in
let SchedRW = [WriteMove] in {
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
- "movd\t{$src, $dst|$dst, $src}",
+ "movd\t{$src, $dst|$dst, $src}",
[(set GR64:$dst,
(bitconvert VR64:$src))], IIC_MMX_MOV_REG_MM>;
let hasSideEffects = 0 in
MMX_INTALU_ITINS>;
// -- Unpack Instructions
-defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw",
int_x86_mmx_punpckhbw,
MMX_UNPCK_H_ITINS>;
-defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd",
int_x86_mmx_punpckhwd,
MMX_UNPCK_H_ITINS>;
-defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq",
int_x86_mmx_punpckhdq,
MMX_UNPCK_H_ITINS>;
-defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw",
int_x86_mmx_punpcklbw,
MMX_UNPCK_L_ITINS>;
-defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd",
int_x86_mmx_punpcklwd,
MMX_UNPCK_L_ITINS>;
defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
- (outs VR64:$dst),
+ (outs VR64:$dst),
(ins VR64:$src1, GR32orGR64:$src2, i32i8imm:$src3),
"pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
// previously we generated:
// addss %xmm0, %xmm1
// movss %xmm1, %xmm0
-//
+//
// we now generate:
// addss %xmm1, %xmm0
// previously we generated:
// addps %xmm0, %xmm1
// movss %xmm1, %xmm0
-//
+//
// we now generate:
// addss %xmm1, %xmm0
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(MULSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst),
(fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)))),
(DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
}
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(ADDSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(SUBSSrr_Int v4f32:$dst, v4f32:$src)>;
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(MULSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(DIVSSrr_Int v4f32:$dst, v4f32:$src)>;
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fadd (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(VADDSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fsub (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(VSUBSSrr_Int v4f32:$dst, v4f32:$src)>;
def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fmul (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(VMULSSrr_Int v4f32:$dst, v4f32:$src)>;
- def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
+ def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst),
(fdiv (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))),
(VDIVSSrr_Int v4f32:$dst, v4f32:$src)>;
let SchedRW = [WriteNop] in {
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins),
- "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
+def PAUSE : I<0x90, RawFrm, (outs), (ins),
+ "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>,
OBXS, Requires<[HasSSE2]>;
}
def : Pat<(v4i64 (X86vsext (v4i32 (bitconvert (v2i64 (load addr:$src)))))),
(VPMOVSXDQYrm addr:$src)>;
- def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2i64
(scalar_to_vector (loadi64 addr:$src))))))),
(VPMOVSXBDYrm addr:$src)>;
- def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
+ def : Pat<(v8i32 (X86vsext (v16i8 (bitconvert (v2f64
(scalar_to_vector (loadf64 addr:$src))))))),
(VPMOVSXBDYrm addr:$src)>;
- def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2i64
(scalar_to_vector (loadi64 addr:$src))))))),
(VPMOVSXWQYrm addr:$src)>;
- def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
+ def : Pat<(v4i64 (X86vsext (v8i16 (bitconvert (v2f64
(scalar_to_vector (loadf64 addr:$src))))))),
(VPMOVSXWQYrm addr:$src)>;
- def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
+ def : Pat<(v4i64 (X86vsext (v16i8 (bitconvert (v4i32
(scalar_to_vector (loadi32 addr:$src))))))),
(VPMOVSXBQYrm addr:$src)>;
}
"sar{w}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi16 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize16;
-def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
"sar{l}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi32 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize32;
-def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
"sar{q}\t{%cl, $dst|$dst, cl}",
[(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
let Uses = [CL] in
def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
+
def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t$dst", [], IIC_SR>, OpSize16;
def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
let Uses = [CL] in
def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{%cl, $dst|$dst, cl}", [], IIC_SR>;
-
+
def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t$dst", [], IIC_SR>, OpSize16;
def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
let Uses = [CL] in
def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{%cl, $dst|$dst, cl}", [], IIC_SR>, OpSize32;
-
+
def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
"rcr{q}\t$dst", [], IIC_SR>;
def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
"rol{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
-def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
+def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
"ror{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
IIC_SR>, OpSize32;
-def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
+def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
"ror{w}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi16 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize16;
-def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi32 addr:$dst), CL), addr:$dst)],
IIC_SR>, OpSize32;
-def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
"ror{q}\t{%cl, $dst|$dst, cl}",
[(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
IIC_SR>;
let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
let Uses = [CL] in {
-def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))],
IIC_SHD16_REG_CL>,
TB, OpSize16;
-def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))],
IIC_SHD16_REG_CL>,
TB, OpSize16;
-def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))],
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))],
IIC_SHD32_REG_CL>, TB, OpSize32;
-def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))],
- IIC_SHD64_REG_CL>,
+ IIC_SHD64_REG_CL>,
TB;
-def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))],
- IIC_SHD64_REG_CL>,
+ IIC_SHD64_REG_CL>,
TB;
}
let isCommutable = 1 in { // These instructions commute to each other.
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR16:$dst),
+ (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, i8imm:$src3),
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR16:$dst),
+ (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2, i8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
(i8 imm:$src3)))], IIC_SHD16_REG_IM>,
TB, OpSize16;
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
- (outs GR32:$dst),
+ (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, i8imm:$src3),
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
- (outs GR32:$dst),
+ (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2, i8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
(i8 imm:$src3)))], IIC_SHD32_REG_IM>,
TB, OpSize32;
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
- (outs GR64:$dst),
+ (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, i8imm:$src3),
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
(i8 imm:$src3)))], IIC_SHD64_REG_IM>,
TB;
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
- (outs GR64:$dst),
+ (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2, i8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
addr:$dst)], IIC_SHD32_MEM_CL>, TB, OpSize32;
-
+
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
[(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
(i8 imm:$src3)), addr:$dst)],
IIC_SHD16_MEM_IM>,
TB, OpSize16;
-def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD32_MEM_IM>,
TB, OpSize32;
-def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
(outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
(i8 imm:$src3)), addr:$dst)],
IIC_SHD64_MEM_IM>,
TB;
-def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
let SchedRW = [WriteSystem] in {
def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", [], IIC_SWAPGS>, TB;
-def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lar{w}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize16;
def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
OpSize16;
// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
-def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB,
OpSize32;
def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"lar{l}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB,
OpSize32;
// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
-def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RM>, TB;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
"lar{q}\t{$src, $dst|$dst, $src}", [], IIC_LAR_RR>, TB;
"lsl{l}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB,
OpSize32;
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
+ "lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RM>, TB;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"lsl{q}\t{$src, $dst|$dst, $src}", [], IIC_LSL_RR>, TB;
"ltr{w}\t$src", [], IIC_LTR>, TB;
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
"ltr{w}\t$src", [], IIC_LTR>, TB;
-
+
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
"push{w}\t{%cs|cs}", [], IIC_PUSH_SR>,
OpSize16, Requires<[Not64BitMode]>;
"lds{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16;
def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lds{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32;
-
+
def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"lss{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lss{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
"lss{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
-
+
def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"les{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize16;
def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"les{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, OpSize32;
-
+
def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"lfs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lfs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
"lfs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
-
+
def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
"lgs{w}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize16;
def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
"lgs{l}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB, OpSize32;
-
+
def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
"lgs{q}\t{$src, $dst|$dst, $src}", [], IIC_LXS>, TB;
"sldt{w}\t$dst", [], IIC_SLDT>, TB;
def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
"sldt{l}\t$dst", [], IIC_SLDT>, OpSize32, TB;
-
+
// LLDT is not interpreted specially in 64-bit mode because there is no sign
// extension.
def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>,
TB;
-def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins),
"smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB;
-def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins),
"smsw{l}\t$dst", [], IIC_SMSW>, OpSize32, TB;
// no m form encodable; use SMSW16m
-def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins),
"smsw{q}\t$dst", [], IIC_SMSW>, TB;
// For memory operands, there is only a 16-bit form
INTR_NO_TYPE,
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP,
- CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
+ CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_SCALAR_MASK_RM
};
X86_INTRINSIC_DATA(addcarry_u64, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(addcarryx_u32, ADX, X86ISD::ADC, 0),
X86_INTRINSIC_DATA(addcarryx_u64, ADX, X86ISD::ADC, 0),
-
+
X86_INTRINSIC_DATA(avx512_gather_dpd_512, GATHER, X86::VGATHERDPDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpi_512, GATHER, X86::VPGATHERDDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_dpq_512, GATHER, X86::VPGATHERDQZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_qpi_512, GATHER, X86::VPGATHERQDZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_qpq_512, GATHER, X86::VPGATHERQQZrm, 0),
X86_INTRINSIC_DATA(avx512_gather_qps_512, GATHER, X86::VGATHERQPSZrm, 0),
-
+
X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH,
X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm),
X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
-
+
X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qpi_512, SCATTER, X86::VPSCATTERQDZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qpq_512, SCATTER, X86::VPSCATTERQQZmr, 0),
X86_INTRINSIC_DATA(avx512_scatter_qps_512, SCATTER, X86::VSCATTERQPSZmr, 0),
-
+
X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH,
X86::VSCATTERPF0DPDm, X86::VSCATTERPF1DPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH,
X86::VSCATTERPF0QPDm, X86::VSCATTERPF1QPDm),
X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH,
X86::VSCATTERPF0QPSm, X86::VSCATTERPF1QPSm),
-
+
X86_INTRINSIC_DATA(rdpmc, RDPMC, X86ISD::RDPMC_DAG, 0),
X86_INTRINSIC_DATA(rdrand_16, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdrand_32, RDRAND, X86ISD::RDRAND, 0),
X86_INTRINSIC_DATA(rdseed_64, RDSEED, X86ISD::RDSEED, 0),
X86_INTRINSIC_DATA(rdtsc, RDTSC, X86ISD::RDTSC_DAG, 0),
X86_INTRINSIC_DATA(rdtscp, RDTSC, X86ISD::RDTSCP_DAG, 0),
-
+
X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
X86_INTRINSIC_DATA(xtest, XTEST, X86ISD::XTEST, 0),
Inst.addOperand(Seg);
}
-static unsigned getRetOpcode(const X86Subtarget &Subtarget)
-{
- return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
+static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
+ return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
}
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// Record our statepoint node in the same section used by STACKMAP
// and PATCHPOINT
- SM.recordStatepoint(MI);
+ SM.recordStatepoint(MI);
}
case TargetOpcode::STATEPOINT:
return LowerSTATEPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), TM,
getSubtargetInfo(), MCInstLowering);
-
+
case TargetOpcode::STACKMAP:
return LowerSTACKMAP(*MI);
const X86RegisterInfo *RegInfo = static_cast<const X86RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
unsigned SlotSize = RegInfo->getSlotSize();
- for (const MCPhysReg *CSR =
+ for (const MCPhysReg *CSR =
RegInfo->X86RegisterInfo::getCalleeSavedRegs(MF);
unsigned Reg = *CSR;
++CSR)
/// contains stack pointer re-alignment code which requires FP.
bool ForceFramePointer;
- /// RestoreBasePointerOffset - Non-zero if the function has base pointer
- /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
- /// displacement from the frame pointer to a slot where the base pointer
- /// is stashed.
+ /// RestoreBasePointerOffset - Non-zero if the function has base pointer
+ /// and makes call to llvm.eh.sjlj.setjmp. When non-zero, the value is a
+ /// displacement from the frame pointer to a slot where the base pointer
+ /// is stashed.
signed char RestoreBasePointerOffset;
-
+
/// CalleeSavedFrameSize - Size of the callee-saved register portion of the
/// stack frame in bytes.
unsigned CalleeSavedFrameSize;
/// True if unaligned 32-byte memory accesses are slow.
bool IsUAMem32Slow;
-
+
/// HasVectorUAMem - True if SIMD operations can have unaligned memory
/// operands. This may require setting a feature bit in the processor.
bool HasVectorUAMem;
/// For this to be profitable, the cost of FDIV must be
/// substantially higher than normal FP ops like FADD and FMUL.
bool UseReciprocalEst;
-
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
{ ISD::SHL, MVT::v8i16, 8*10 }, // Scalarized.
{ ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
{ ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
- { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
+ { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
{ ISD::SRL, MVT::v16i8, 16*10 }, // Scalarized.
{ ISD::SRL, MVT::v8i16, 8*10 }, // Scalarized.
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
};
-
+
if (ST->hasSSSE3()) {
int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
-
+
// This is expanded into a long sequence of four extract + four insert.
{ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 48}
};
- // Fall-back (SSE3 and SSE2).
+ // Fall-back (SSE3 and SSE2).
int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
if (Idx != -1)
return LT.first * SSEAltShuffleTbl[Idx].Cost;
unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy,
bool IsPairwise) const {
-
+
std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy);
-
+
MVT MTy = LT.second;
-
+
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
-
- // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
- // and make it as the cost.
-
+
+ // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
+ // and make it as the cost.
+
static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
{ ISD::ADD, MVT::v8i16, 5 },
};
-
+
static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::FADD, MVT::v4f64, 5 },
{ ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
{ ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
};
-
+
static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v4f64, 3 },
{ ISD::ADD, MVT::v8i16, 4 },
{ ISD::ADD, MVT::v8i32, 5 },
};
-
+
if (IsPairwise) {
if (ST->hasAVX()) {
int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
if (Idx != -1)
return LT.first * AVX1CostTblPairWise[Idx].Cost;
}
-
+
if (ST->hasSSE42()) {
int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
if (Idx != -1)
if (Idx != -1)
return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
}
-
+
if (ST->hasSSE42()) {
int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
if (Idx != -1)