const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
- if (Reg == AMDGPU::SCC)
- return true;
-
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return false;
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
const TargetRegisterClass *RC =
return RB->getID() == AMDGPU::SCCRegBankID;
}
+static bool isVCC(unsigned Reg, const MachineRegisterInfo &MRI,
+ const SIRegisterInfo &TRI) {
+ assert(!TargetRegisterInfo::isPhysicalRegister(Reg));
+
+ auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+ const TargetRegisterClass *RC =
+ RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
+ if (RC) {
+ return RC == TRI.getWaveMaskRegClass() &&
+ MRI.getType(Reg).getSizeInBits() == 1;
+ }
+
+ const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+ return RB->getID() == AMDGPU::VCCRegBankID;
+}
+
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
const MachineOperand &Src = I.getOperand(1);
unsigned SrcReg = Src.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
- unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
- unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
+ unsigned DstReg = I.getOperand(0).getReg();
- // We have a copy from a 32-bit to 64-bit register. This happens
- // when we are selecting scc->vcc copies.
- if (DstSize == 64) {
+ // Specially handle scc->vcc copies.
+ if (isVCC(DstReg, MRI, TRI)) {
const DebugLoc &DL = I.getDebugLoc();
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
.addImm(0)
.addReg(SrcReg);
if (!MRI.getRegClassOrNull(SrcReg))
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
+# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE64 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -run-pass=instruction-select -verify-machineinstrs -global-isel -o - %s | FileCheck -check-prefix=WAVE32 %s
---
body: |
bb.0:
liveins: $sgpr2_sgpr3
- ; GCN-LABEL: name: copy
- ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
- ; GCN: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
- ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+ ; WAVE64-LABEL: name: copy
+ ; WAVE64: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+ ; WAVE64: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+ ; WAVE64: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; WAVE64: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; WAVE32-LABEL: name: copy
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr2_sgpr3
+ ; WAVE32: [[COPY1:%[0-9]+]]:vreg_64 = COPY [[COPY]]
+ ; WAVE32: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; WAVE32: FLAT_STORE_DWORD [[COPY1]], [[DEF]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:sgpr(p1) = COPY $sgpr2_sgpr3
%1:vgpr(p1) = COPY %0
%2:vgpr(s32) = G_IMPLICIT_DEF
...
---
-name: copy_vcc_scc
+name: copy_vcc_bank_scc_bank
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
- ; GCN-LABEL: name: copy_vcc_scc
- ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
- ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
- ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+ ; WAVE64-LABEL: name: copy_vcc_bank_scc_bank
+ ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+ ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+ ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+ ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+ ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
...
---
-name: copy_vcc_scc_2_uses
+name: copy_vcc_bank_scc_bank_2_uses
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
- ; GCN-LABEL: name: copy_vcc_scc_2_uses
- ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
- ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
- ; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
- ; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
- ; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
- ; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+
+ ; WAVE64-LABEL: name: copy_vcc_bank_scc_bank_2_uses
+ ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+ ; WAVE64: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+ ; WAVE64: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; WAVE64: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
+ ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; WAVE32-LABEL: name: copy_vcc_bank_scc_bank_2_uses
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
+ ; WAVE32: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
+ ; WAVE32: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
+ ; WAVE32: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
+ ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%7:vgpr(s32) = G_SELECT %6, %1, %5
G_STORE %7, %0 :: (store 4, addrspace 1)
...
+
---
+
+name: copy_vcc_bank_scc_physreg
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
+
+ ; WAVE64-LABEL: name: copy_vcc_bank_scc_physreg
+ ; WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; WAVE64: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY $scc
+ ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
+ ; WAVE64: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ ; WAVE32-LABEL: name: copy_vcc_bank_scc_physreg
+ ; WAVE32: $vcc_hi = IMPLICIT_DEF
+ ; WAVE32: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32_xm0_xexec = COPY $scc
+ ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[COPY3]], implicit $exec
+ ; WAVE32: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+ %0:vgpr(p1) = COPY $vgpr0_vgpr1
+ %1:vgpr(s32) = COPY $vgpr2
+ %2:vgpr(s32) = COPY $vgpr3
+ %3:vcc(s1) = COPY $scc
+ %5:vgpr(s32) = G_SELECT %3, %1, %2
+ G_STORE %5, %0 :: (store 4, addrspace 1)
+...