From: Matt Arsenault Date: Mon, 15 Jul 2019 18:25:24 +0000 (+0000) Subject: AMDGPU/GlobalISel: Handle llvm.amdgcn.if.break X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=92d15388333ff8838a47c3efd7d788f9a596ad9e;p=llvm AMDGPU/GlobalISel: Handle llvm.amdgcn.if.break git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366102 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 6fa3e7baf23..317a9b5c08c 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -475,6 +475,31 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC( case Intrinsic::minnum: case Intrinsic::amdgcn_cvt_pkrtz: return selectImpl(I, CoverageInfo); + case Intrinsic::amdgcn_if_break: { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick + // SelectionDAG uses for wave32 vs wave64. + BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK)) + .add(I.getOperand(0)) + .add(I.getOperand(2)) + .add(I.getOperand(3)); + + Register DstReg = I.getOperand(0).getReg(); + Register Src0Reg = I.getOperand(2).getReg(); + Register Src1Reg = I.getOperand(3).getReg(); + + I.eraseFromParent(); + + for (Register Reg : { DstReg, Src0Reg, Src1Reg }) { + if (!MRI.getRegClassOrNull(Reg)) + MRI.setRegClass(Reg, TRI.getWaveMaskRegClass()); + } + + return true; + } default: return selectImpl(I, CoverageInfo); } diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 2bbc259e8fc..be05d9cb0ec 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1985,6 +1985,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, SrcSize); break; } + case Intrinsic::amdgcn_if_break: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1); + OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); + break; + } } break; } diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll new file mode 100644 index 00000000000..282441a2a1d --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) { +; GCN-LABEL: test_wave32: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s0, s[4:5], 0x0 +; GCN-NEXT: s_load_dword s1, s[4:5], 0x24 +; GCN-NEXT: ; implicit-def: $vcc_hi +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_eq_u32 s0, 0 +; GCN-NEXT: s_cselect_b32 s0, -1, 0 +; GCN-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 +; GCN-NEXT: s_or_b32 s0, s0, s1 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt_vscnt null, 0x0 +; GCN-NEXT: flat_store_dword v[0:1], v0 +; GCN-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %arg0, 0 + %break = call i32 @llvm.amdgcn.if.break.i32(i1 %cond, i32 %saved) + store volatile i32 %break, i32 addrspace(1)* undef + ret void +} + +declare i32 @llvm.amdgcn.if.break.i32(i1, i32) diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll new file mode 100644 index 00000000000..77d1374c86c --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) { +; GCN-LABEL: test_wave64: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s2, s[4:5], 0x0 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_eq_u32 s2, 0 +; GCN-NEXT: s_cselect_b32 s2, -1, 0 +; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: flat_store_dwordx2 v[0:1], v[0:1] +; GCN-NEXT: s_endpgm +entry: + %cond = icmp eq i32 %arg0, 0 + %break = call i64 @llvm.amdgcn.if.break.i64(i1 %cond, i64 %saved) + store volatile i64 %break, i64 addrspace(1)* undef + ret void +} + +declare i64 @llvm.amdgcn.if.break.i64(i1, i64)