// FIXME: This isn't safe because the addressing mode doesn't work
// correctly if vaddr is negative.
//
- // FIXME: Handle v_add_u32 and VOP3 form. Also don't rely on immediate
- // being in src0.
- //
// FIXME: Should probably be done somewhere else, maybe SIFoldOperands.
//
// See if we can extract an immediate offset by recognizing one of these:
// V_ADD_I32_e32 dst, imm, src1
// V_ADD_I32_e32 dst, (S_MOV_B32 imm), src1
// V_ADD will be removed by "Remove dead machine instructions".
- if (Add && Add->getOpcode() == AMDGPU::V_ADD_I32_e32) {
- const MachineOperand *Src =
- getNamedOperand(*Add, AMDGPU::OpName::src0);
-
- if (Src->isReg()) {
- auto Mov = MRI.getUniqueVRegDef(Src->getReg());
- if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
- Src = &Mov->getOperand(1);
- }
+ if (Add &&
+ (Add->getOpcode() == AMDGPU::V_ADD_I32_e32 ||
+ Add->getOpcode() == AMDGPU::V_ADD_U32_e64)) {
+ static const unsigned SrcNames[2] = {
+ AMDGPU::OpName::src0,
+ AMDGPU::OpName::src1,
+ };
+
+ // Find a literal offset in one of source operands.
+ for (int i = 0; i < 2; i++) {
+ const MachineOperand *Src =
+ getNamedOperand(*Add, SrcNames[i]);
+
+ if (Src->isReg()) {
+ auto Mov = MRI.getUniqueVRegDef(Src->getReg());
+ if (Mov && Mov->getOpcode() == AMDGPU::S_MOV_B32)
+ Src = &Mov->getOperand(1);
+ }
+
+ if (Src) {
+ if (Src->isImm())
+ Offset = Src->getImm();
+ else if (Src->isCImm())
+ Offset = Src->getCImm()->getZExtValue();
+ }
+
+ if (Offset && isLegalMUBUFImmOffset(Offset)) {
+ VAddr = getNamedOperand(*Add, SrcNames[!i]);
+ break;
+ }
- if (Src) {
- if (Src->isImm())
- Offset = Src->getImm();
- else if (Src->isCImm())
- Offset = Src->getCImm()->getZExtValue();
- }
-
- if (Offset && isLegalMUBUFImmOffset(Offset))
- VAddr = getNamedOperand(*Add, AMDGPU::OpName::src1);
- else
Offset = 0;
+ }
}
BuildMI(*MBB, Inst, Inst.getDebugLoc(),
; GCN-LABEL: {{^}}smrd_vgpr_offset_imm:
; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
-
-; GFX9-NEXT: v_add_u32_e32 [[ADD:v[0-9]+]], 0xfff, v0
-; GFX9-NEXT: buffer_load_dword v{{[0-9]}}, [[ADD]], s[0:3], 0 offen ;
+; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen offset:4095 ;
define amdgpu_ps float @smrd_vgpr_offset_imm(<4 x i32> inreg %desc, i32 %offset) #0 {
main_body:
%off = add i32 %offset, 4095
; GCN-LABEL: {{^}}smrd_vgpr_merged:
; GCN-NEXT: %bb.
-
-; SICIVI-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
-; SICIVI-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
-
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
-; GFX9: buffer_load_dword
+; GCN-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:4
+; GCN-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], v0, s[0:3], 0 offen offset:28
define amdgpu_ps void @smrd_vgpr_merged(<4 x i32> inreg %desc, i32 %a) #0 {
main_body:
%a1 = add i32 %a, 4