const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
getTargetMachine().getSubtargetImpl()->getInstrInfo());
+ MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
TII->legalizeOperands(MI);
if (TII->isMIMG(MI->getOpcode())) {
unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
MI->setDesc(TII->get(NewOpcode));
- MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
MRI.setRegClass(VReg, RC);
return;
}
let LGKM_CNT = 1;
let UseNamedOperandTable = 1;
+ let DisableEncoding = "$m0";
}
class MUBUF <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, ds_offset:$offset, M0Reg:$m0),
asm#" $vdst, $addr"#"$offset"#" [M0]",
[]> {
let data0 = 0;
class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1),
+ (ins i1imm:$gds, VReg_32:$addr, ds_offset0:$offset0, ds_offset1:$offset1,
+ M0Reg:$m0),
asm#" $vdst, $addr"#"$offset0"#"$offset1 [M0]",
[]> {
let data0 = 0;
class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0"#"$offset"#" [M0]",
[]> {
let data1 = 0;
op,
(outs),
(ins i1imm:$gds, VReg_32:$addr, regClass:$data0, regClass:$data1,
- ds_offset0:$offset0, ds_offset1:$offset1),
+ ds_offset0:$offset0, ds_offset1:$offset1, M0Reg:$m0),
asm#" $addr, $data0, $data1"#"$offset0"#"$offset1 [M0]",
[]> {
let mayStore = 1;
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
asm#" $vdst, $addr, $data0"#"$offset"#" [M0]", []>,
AtomicNoRet<noRetOp, 1> {
class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc, string noRetOp = ""> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
asm#" $vdst, $addr, $data0, $data1"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 1> {
class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0, $data1"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 0> {
class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc, string noRetOp = asm> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, rc:$data0, ds_offset:$offset, M0Reg:$m0),
asm#" $addr, $data0"#"$offset"#" [M0]",
[]>,
AtomicNoRet<noRetOp, 0> {
class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
(vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
- (inst (i1 0), $ptr, (as_i16imm $offset))
+ (inst (i1 0), $ptr, (as_i16imm $offset), (S_MOV_B32 -1))
>;
def : DSReadPat <DS_READ_I8, i32, sextloadi8_local>;
def : Pat <
(v2i32 (local_load (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1))),
- (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1)
+ (DS_READ2_B32 (i1 0), $ptr, $offset0, $offset1, (S_MOV_B32 -1))
>;
class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
>;
def : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>;
(local_store v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
i8:$offset1)),
(DS_WRITE2_B32 (i1 0), $ptr, (EXTRACT_SUBREG $value, sub0),
- (EXTRACT_SUBREG $value, sub1), $offset0, $offset1)
+ (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
+ (S_MOV_B32 -1))
>;
class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
- (inst (i1 0), $ptr, $value, (as_i16imm $offset))
+ (inst (i1 0), $ptr, $value, (as_i16imm $offset), (S_MOV_B32 -1))
>;
// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec
class DSAtomicIncRetPat<DS inst, ValueType vt,
Instruction LoadImm, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
- (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset))
+ (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset), (S_MOV_B32 -1))
>;
class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
- (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset))
+ (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset), (S_MOV_B32 -1))
>;
// Be careful, since the addresses could be subregisters themselves in weird
// cases, like vectors of pointers.
const MachineOperand *AddrReg = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+ const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
unsigned DestReg0 = TII->getNamedOperand(*I, AMDGPU::OpName::vdst)->getReg();
unsigned DestReg1
.addOperand(*AddrReg) // addr
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addOperand(*M0Reg) // M0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
LIS->shrinkToUses(&AddrRegLI);
+ LiveInterval &M0RegLI = LIS->getInterval(M0Reg->getReg());
+ LIS->shrinkToUses(&M0RegLI);
+
LIS->getInterval(DestReg); // Create new LI
DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
// Be sure to use .addOperand(), and not .addReg() with these. We want to be
// sure we preserve the subregister index and any register flags set on them.
const MachineOperand *Addr = TII->getNamedOperand(*I, AMDGPU::OpName::addr);
+ const MachineOperand *M0Reg = TII->getNamedOperand(*I, AMDGPU::OpName::m0);
const MachineOperand *Data0 = TII->getNamedOperand(*I, AMDGPU::OpName::data0);
const MachineOperand *Data1
= TII->getNamedOperand(*Paired, AMDGPU::OpName::data0);
.addOperand(*Data1) // data1
.addImm(NewOffset0) // offset0
.addImm(NewOffset1) // offset1
+ .addOperand(*M0Reg) // m0
.addMemOperand(*I->memoperands_begin())
.addMemOperand(*Paired->memoperands_begin());
// XXX - How do we express subregisters here?
- unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
+ unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg(),
+ M0Reg->getReg()};
LIS->RemoveMachineInstrFromMaps(I);
LIS->RemoveMachineInstrFromMaps(Paired);
void Kill(MachineInstr &MI);
void Branch(MachineInstr &MI);
- void InitM0ForLDS(MachineBasicBlock::iterator MI);
void LoadM0(MachineInstr &MI, MachineInstr *MovRel);
void IndirectSrc(MachineInstr &MI);
void IndirectDst(MachineInstr &MI);
MI.eraseFromParent();
}
-/// The m0 register stores the maximum allowable address for LDS reads and
-/// writes. Its value must be at least the size in bytes of LDS allocated by
-/// the shader. For simplicity, we set it to the maximum possible value.
-void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) {
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32),
- AMDGPU::M0).addImm(0xffffffff);
-}
-
void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) {
MachineBasicBlock &MBB = *MI.getParent();
.addReg(Save);
}
- // FIXME: Are there any values other than the LDS address clamp that need to
- // be stored in the m0 register and may be live for more than a few
- // instructions? If so, we should save the m0 register at the beginning
- // of this function and restore it here.
- // FIXME: Add support for LDS direct loads.
- InitM0ForLDS(&MI);
MI.eraseFromParent();
}
MachineInstr &MI = *I;
if (TII->isDS(MI.getOpcode())) {
- NeedM0 = true;
NeedWQM = true;
}
}
}
- if (NeedM0) {
- MachineBasicBlock &MBB = MF.front();
- // Initialize M0 to a value that won't cause LDS access to be discarded
- // due to offset clamping
- InitM0ForLDS(MBB.getFirstNonPHI());
- }
-
if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
MachineBasicBlock &MBB = MF.front();
BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
; pointer can be used with an offset into the second one.
; SI-LABEL: {{^}}load_shl_base_lds_2:
-; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: s_mov_b32 m0, -1
+; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
; SI: s_endpgm
define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {