bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT,
const DebugLoc &DL);
- bool X86FastEmitLoad(EVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
+ bool X86FastEmitLoad(MVT VT, X86AddressMode &AM, MachineMemOperand *MMO,
unsigned &ResultReg, unsigned Alignment = 1);
bool X86FastEmitStore(EVT VT, const Value *Val, X86AddressMode &AM,
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
-bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
+bool X86FastISel::X86FastEmitLoad(MVT VT, X86AddressMode &AM,
MachineMemOperand *MMO, unsigned &ResultReg,
unsigned Alignment) {
bool HasSSE41 = Subtarget->hasSSE41();
bool HasVLX = Subtarget->hasVLX();
bool IsNonTemporal = MMO && MMO->isNonTemporal();
+ // Treat i1 loads the same as i8 loads. Masking will be done when storing.
+ if (VT == MVT::i1)
+ VT = MVT::i8;
+
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
- const TargetRegisterClass *RC = nullptr;
- switch (VT.getSimpleVT().SimpleTy) {
+ switch (VT.SimpleTy) {
default: return false;
- case MVT::i1:
case MVT::i8:
Opc = X86::MOV8rm;
- RC = &X86::GR8RegClass;
break;
case MVT::i16:
Opc = X86::MOV16rm;
- RC = &X86::GR16RegClass;
break;
case MVT::i32:
Opc = X86::MOV32rm;
- RC = &X86::GR32RegClass;
break;
case MVT::i64:
// Must be in x86-64 mode.
Opc = X86::MOV64rm;
- RC = &X86::GR64RegClass;
break;
case MVT::f32:
- if (X86ScalarSSEf32) {
+ if (X86ScalarSSEf32)
Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
- RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
- } else {
+ else
Opc = X86::LD_Fp32m;
- RC = &X86::RFP32RegClass;
- }
break;
case MVT::f64:
- if (X86ScalarSSEf64) {
+ if (X86ScalarSSEf64)
Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
- RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
- } else {
+ else
Opc = X86::LD_Fp64m;
- RC = &X86::RFP64RegClass;
- }
break;
case MVT::f80:
// No f80 support yet.
else
Opc = HasVLX ? X86::VMOVUPSZ128rm :
HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
- RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
break;
case MVT::v2f64:
if (IsNonTemporal && Alignment >= 16 && HasSSE41)
else
Opc = HasVLX ? X86::VMOVUPDZ128rm :
HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
- RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
break;
case MVT::v4i32:
case MVT::v2i64:
else
Opc = HasVLX ? X86::VMOVDQU64Z128rm :
HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
- RC = HasVLX ? &X86::VR128XRegClass : &X86::VR128RegClass;
break;
case MVT::v8f32:
assert(HasAVX);
Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
else
Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
- RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
break;
case MVT::v4f64:
assert(HasAVX);
Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
else
Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
- RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
break;
case MVT::v8i32:
case MVT::v4i64:
Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
else
Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
- RC = HasVLX ? &X86::VR256XRegClass : &X86::VR256RegClass;
break;
case MVT::v16f32:
assert(HasAVX512);
Opc = X86::VMOVNTDQAZrm;
else
Opc = (Alignment >= 64) ? X86::VMOVAPSZrm : X86::VMOVUPSZrm;
- RC = &X86::VR512RegClass;
break;
case MVT::v8f64:
assert(HasAVX512);
Opc = X86::VMOVNTDQAZrm;
else
Opc = (Alignment >= 64) ? X86::VMOVAPDZrm : X86::VMOVUPDZrm;
- RC = &X86::VR512RegClass;
break;
case MVT::v8i64:
case MVT::v16i32:
Opc = X86::VMOVNTDQAZrm;
else
Opc = (Alignment >= 64) ? X86::VMOVDQA64Zrm : X86::VMOVDQU64Zrm;
- RC = &X86::VR512RegClass;
break;
}
+ const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
ResultReg = createResultReg(RC);
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
unsigned Opc = 0;
bool HasAVX = Subtarget->hasAVX();
bool HasAVX512 = Subtarget->hasAVX512();
- const TargetRegisterClass *RC = nullptr;
switch (VT.SimpleTy) {
default: return 0;
case MVT::f32:
- if (X86ScalarSSEf32) {
+ if (X86ScalarSSEf32)
Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
- RC = HasAVX512 ? &X86::FR32XRegClass : &X86::FR32RegClass;
- } else {
+ else
Opc = X86::LD_Fp32m;
- RC = &X86::RFP32RegClass;
- }
break;
case MVT::f64:
- if (X86ScalarSSEf64) {
+ if (X86ScalarSSEf64)
Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
- RC = HasAVX512 ? &X86::FR64XRegClass : &X86::FR64RegClass;
- } else {
+ else
Opc = X86::LD_Fp64m;
- RC = &X86::RFP64RegClass;
- }
break;
case MVT::f80:
// No f80 support yet.
// Create the load from the constant pool.
unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
- unsigned ResultReg = createResultReg(RC);
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
if (CM == CodeModel::Large) {
unsigned AddrReg = createResultReg(&X86::GR64RegClass);