MachineOperand
AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
+ const TargetRegisterClass &SubRC,
unsigned SubIdx) const {
MachineInstr *MI = MO.getParent();
MachineBasicBlock *BB = MO.getParent()->getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned DstReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register DstReg = MRI.createVirtualRegister(&SubRC);
if (MO.isReg()) {
unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.getSubReg(), SubIdx);
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
- unsigned Size = RBI.getSizeInBits(I.getOperand(0).getReg(), MRI, TRI);
- unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
-
- if (Size != 64)
- return false;
-
- DebugLoc DL = I.getDebugLoc();
+ Register DstReg = I.getOperand(0).getReg();
+ const DebugLoc &DL = I.getDebugLoc();
+ unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
+ const RegisterBank *DstRB = RBI.getRegBank(DstReg, MRI, TRI);
+ const bool IsSALU = DstRB->getID() == AMDGPU::SGPRRegBankID;
- MachineOperand Lo1(getSubOperand64(I.getOperand(1), AMDGPU::sub0));
- MachineOperand Lo2(getSubOperand64(I.getOperand(2), AMDGPU::sub0));
+ if (Size == 32) {
+ if (IsSALU) {
+ MachineInstr *Add =
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstReg)
+ .add(I.getOperand(1))
+ .add(I.getOperand(2));
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
+ }
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
- .add(Lo1)
- .add(Lo2);
+ if (STI.hasAddNoCarry()) {
+ I.setDesc(TII.get(AMDGPU::V_ADD_U32_e64));
+ I.addOperand(*MF, MachineOperand::CreateImm(0));
+ I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
- MachineOperand Hi1(getSubOperand64(I.getOperand(1), AMDGPU::sub1));
- MachineOperand Hi2(getSubOperand64(I.getOperand(2), AMDGPU::sub1));
+ Register UnusedCarry = MRI.createVirtualRegister(TRI.getWaveMaskRegClass());
+ MachineInstr *Add
+ = BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstReg)
+ .addDef(UnusedCarry, RegState::Dead)
+ .add(I.getOperand(1))
+ .add(I.getOperand(2))
+ .addImm(0);
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*Add, TII, TRI, RBI);
+ }
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
- .add(Hi1)
- .add(Hi2);
+ const TargetRegisterClass &RC
+ = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
+ const TargetRegisterClass &HalfRC
+ = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
+
+ MachineOperand Lo1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub0));
+ MachineOperand Lo2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub0));
+ MachineOperand Hi1(getSubOperand64(I.getOperand(1), HalfRC, AMDGPU::sub1));
+ MachineOperand Hi2(getSubOperand64(I.getOperand(2), HalfRC, AMDGPU::sub1));
+
+ Register DstLo = MRI.createVirtualRegister(&HalfRC);
+ Register DstHi = MRI.createVirtualRegister(&HalfRC);
+
+ if (IsSALU) {
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
+ .add(Lo1)
+ .add(Lo2);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
+ .add(Hi1)
+ .add(Hi2);
+ } else {
+ const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
+ Register CarryReg = MRI.createVirtualRegister(CarryRC);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
+ .addDef(CarryReg)
+ .add(Lo1)
+ .add(Lo2)
+ .addImm(0);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
+ .addDef(MRI.createVirtualRegister(CarryRC), RegState::Dead)
+ .add(Hi1)
+ .add(Hi2)
+ .addReg(CarryReg, RegState::Kill)
+ .addImm(0);
+ }
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), I.getOperand(0).getReg())
- .addReg(DstLo)
- .addImm(AMDGPU::sub0)
- .addReg(DstHi)
- .addImm(AMDGPU::sub1);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(DstLo)
+ .addImm(AMDGPU::sub0)
+ .addReg(DstHi)
+ .addImm(AMDGPU::sub1);
- for (MachineOperand &MO : I.explicit_operands()) {
- if (!MO.isReg() || TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- continue;
- RBI.constrainGenericRegister(MO.getReg(), AMDGPU::SReg_64RegClass, MRI);
- }
+ if (!RBI.constrainGenericRegister(DstReg, RC, MRI) ||
+ !RBI.constrainGenericRegister(I.getOperand(1).getReg(), RC, MRI) ||
+ !RBI.constrainGenericRegister(I.getOperand(2).getReg(), RC, MRI))
+ return false;
I.eraseFromParent();
return true;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=fiji -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10-WAVE32 %s
+
+---
+name: gep_p0_sgpr_sgpr
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; GFX6-LABEL: name: gep_p0_sgpr_sgpr
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+ ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+ ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+ ; GFX6: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX8-LABEL: name: gep_p0_sgpr_sgpr
+ ; GFX8: $vcc_hi = IMPLICIT_DEF
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+ ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+ ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+ ; GFX8: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: gep_p0_sgpr_sgpr
+ ; GFX9: $vcc_hi = IMPLICIT_DEF
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX9: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+ ; GFX9: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+ ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+ ; GFX9: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_sgpr
+ ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+ ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+ ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+ ; GFX10-WAVE64: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+ ; GFX10-WAVE64: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+ ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_sgpr
+ ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
+ ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+ ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+ ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+ ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+ ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+ ; GFX10-WAVE32: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY2]], [[COPY3]], implicit-def $scc
+ ; GFX10-WAVE32: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY4]], [[COPY5]], implicit-def $scc, implicit $scc
+ ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[S_ADD_U32_]], %subreg.sub0, [[S_ADDC_U32_]], %subreg.sub1
+ ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ %0:sgpr(p0) = COPY $sgpr0_sgpr1
+ %1:sgpr(s64) = COPY $sgpr2_sgpr3
+ %2:sgpr(p0) = G_GEP %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: gep_p0_vgpr_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GFX6-LABEL: name: gep_p0_vgpr_vgpr
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX8-LABEL: name: gep_p0_vgpr_vgpr
+ ; GFX8: $vcc_hi = IMPLICIT_DEF
+ ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX8: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: gep_p0_vgpr_vgpr
+ ; GFX9: $vcc_hi = IMPLICIT_DEF
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX9: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr
+ ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr
+ ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+ ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ %0:vgpr(p0) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = COPY $vgpr2_vgpr3
+ %2:vgpr(p0) = G_GEP %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: gep_p0_sgpr_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; GFX6-LABEL: name: gep_p0_sgpr_vgpr
+ ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+ ; GFX6: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX8-LABEL: name: gep_p0_sgpr_vgpr
+ ; GFX8: $vcc_hi = IMPLICIT_DEF
+ ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+ ; GFX8: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX8: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX9-LABEL: name: gep_p0_sgpr_vgpr
+ ; GFX9: $vcc_hi = IMPLICIT_DEF
+ ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+ ; GFX9: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX9: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr
+ ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr
+ ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $sgpr0_sgpr1
+ ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GFX10-WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+ ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+ ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+ ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+ ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+ ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
+ ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+ ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+ %0:vgpr(p0) = COPY $sgpr0_sgpr1
+ %1:vgpr(s64) = COPY $vgpr0_vgpr1
+ %2:vgpr(p0) = G_GEP %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: gep_p3_sgpr_sgpr
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; GFX6-LABEL: name: gep_p3_sgpr_sgpr
+ ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+ ; GFX8-LABEL: name: gep_p3_sgpr_sgpr
+ ; GFX8: $vcc_hi = IMPLICIT_DEF
+ ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX8: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GFX8: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+ ; GFX9-LABEL: name: gep_p3_sgpr_sgpr
+ ; GFX9: $vcc_hi = IMPLICIT_DEF
+ ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+ ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_sgpr
+ ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX10-WAVE64: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GFX10-WAVE64: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+ ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_sgpr
+ ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; GFX10-WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+ ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+ ; GFX10-WAVE32: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
+ ; GFX10-WAVE32: S_ENDPGM 0, implicit [[S_ADD_U32_]]
+ %0:sgpr(p3) = COPY $sgpr0
+ %1:sgpr(s32) = COPY $sgpr1
+ %2:sgpr(p3) = G_GEP %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: gep_p3_vgpr_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; GFX6-LABEL: name: gep_p3_vgpr_vgpr
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX6: S_ENDPGM 0, implicit %2
+ ; GFX8-LABEL: name: gep_p3_vgpr_vgpr
+ ; GFX8: $vcc_hi = IMPLICIT_DEF
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX9-LABEL: name: gep_p3_vgpr_vgpr
+ ; GFX9: $vcc_hi = IMPLICIT_DEF
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+ ; GFX10-WAVE64-LABEL: name: gep_p3_vgpr_vgpr
+ ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+ ; GFX10-WAVE32-LABEL: name: gep_p3_vgpr_vgpr
+ ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+ %0:vgpr(p3) = COPY $vgpr0
+ %1:vgpr(s32) = COPY $vgpr1
+ %2:vgpr(p3) = G_GEP %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: gep_p3_sgpr_vgpr
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; GFX6-LABEL: name: gep_p3_sgpr_vgpr
+ ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+ ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX6: S_ENDPGM 0, implicit %2
+ ; GFX8-LABEL: name: gep_p3_sgpr_vgpr
+ ; GFX8: $vcc_hi = IMPLICIT_DEF
+ ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+ ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX8: %2:vgpr_32, dead %3:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX8: S_ENDPGM 0, implicit %2
+ ; GFX9-LABEL: name: gep_p3_sgpr_vgpr
+ ; GFX9: $vcc_hi = IMPLICIT_DEF
+ ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+ ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX9: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+ ; GFX10-WAVE64-LABEL: name: gep_p3_sgpr_vgpr
+ ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+ ; GFX10-WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-WAVE64: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX10-WAVE64: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+ ; GFX10-WAVE32-LABEL: name: gep_p3_sgpr_vgpr
+ ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; GFX10-WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $sgpr0
+ ; GFX10-WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX10-WAVE32: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+ ; GFX10-WAVE32: S_ENDPGM 0, implicit [[V_ADD_U32_e64_]]
+ %0:vgpr(p3) = COPY $sgpr0
+ %1:vgpr(s32) = COPY $vgpr0
+ %2:vgpr(p3) = G_GEP %0, %1
+ S_ENDPGM 0, implicit %2
+
+...
regBankSelected: true
# GCN: body:
-# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+# GCN: [[PTR:%[0-9]+]]:sreg_64_xexec = COPY $sgpr0_sgpr1
# Immediate offset:
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0
# Max immediate for CI
# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4294967292
# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 3
-# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
-# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
-# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
-# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
-# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1
-# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1
+# SIVI: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
+# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
+# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
+# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
+# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
-# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
+# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0
# Immediate overflow for CI
# GCN: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
# GCN: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4
-# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
-# GCN-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
-# GCN-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
-# GCN: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
-# GCN-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1
-# GCN-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1
+# GCN: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
+# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
+# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
+# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
+# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
-# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
+# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
# Max 32-bit byte offset
# Overflow 32-bit byte offset
# SIVI: [[K_LO:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
# SIVI: [[K_HI:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
-# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
-# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sgpr_32 = COPY [[K]].sub0
-# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub0
-# SIVI: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
-# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sgpr_32 = COPY [[K]].sub1
-# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sgpr_32 = COPY [[PTR]].sub1
+# SIVI: [[K:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
+# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
+# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
+# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
+# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
+# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
-# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
+# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0