From: Matt Arsenault Date: Sat, 29 Jun 2019 00:33:13 +0000 (+0000) Subject: AMDGPU/GlobalISel: RegBankSelect for some DS intrinsics X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7846e1a4c86ed67c375e63af995b801f2d970068;p=llvm AMDGPU/GlobalISel: RegBankSelect for some DS intrinsics git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364698 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index d2e7e4da705..a58b9958c47 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1042,7 +1042,11 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const { SmallVector OpdsMapping(MI.getNumOperands()); for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - unsigned Size = getSizeInBits(MI.getOperand(I).getReg(), MRI, *TRI); + const MachineOperand &Op = MI.getOperand(I); + if (!Op.isReg()) + continue; + + unsigned Size = getSizeInBits(Op.getReg(), MRI, *TRI); OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); } @@ -1503,6 +1507,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_udot8: case Intrinsic::amdgcn_fdiv_fast: return getDefaultMappingVOP(MI); + case Intrinsic::amdgcn_ds_permute: + case Intrinsic::amdgcn_ds_bpermute: + return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_kernarg_segment_ptr: case Intrinsic::amdgcn_s_getpc: case Intrinsic::amdgcn_groupstaticsize: { @@ -1592,6 +1599,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case Intrinsic::amdgcn_ds_append: + case Intrinsic::amdgcn_ds_consume: + case Intrinsic::amdgcn_ds_fadd: + case Intrinsic::amdgcn_ds_fmin: + case Intrinsic::amdgcn_ds_fmax: + return getDefaultMappingAllVGPR(MI); + case Intrinsic::amdgcn_ds_ordered_add: + case Intrinsic::amdgcn_ds_ordered_swap: + return getInvalidInstructionMapping(); case Intrinsic::amdgcn_exp_compr: OpdsMapping[0] = nullptr; // IntrinsicID // FIXME: These are immediate values which can't be read from registers. diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir new file mode 100644 index 00000000000..0267f7612de --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.append.mir @@ -0,0 +1,36 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ds_append_s +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: ds_append_s + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY1]](p3), 0 + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0 + +... + +--- +name: ds_append_v +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ds_append_v + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), [[COPY]](p3), 0 + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.append), %0, 0 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir new file mode 100644 index 00000000000..996a01a2c70 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.bpermute.mir @@ -0,0 +1,24 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ds_bpermute_ss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: ds_bpermute_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), [[COPY2]](s32), [[COPY3]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.bpermute), %0, %1 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir new file mode 100644 index 00000000000..50dd920210b --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.consume.mir @@ -0,0 +1,36 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ds_consume_s +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0 + ; CHECK-LABEL: name: ds_consume_s + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY1]](p3), 0 + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0 + +... + +--- +name: ds_consume_v +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; CHECK-LABEL: name: ds_consume_v + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), [[COPY]](p3), 0 + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.consume), %0, 0 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir new file mode 100644 index 00000000000..5415dbff59a --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmax.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ds_fmax_ss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: ds_fmax_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0 + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 + +... + +--- +name: ds_fmax_sv +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: ds_fmax_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0 + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 + +... + +--- +name: ds_fmax_vs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; CHECK-LABEL: name: ds_fmax_vs + ; CHECK: liveins: $vgpr0, $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0 + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 + +... + +--- +name: ds_fmax_vv +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: ds_fmax_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0 + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmax), %0, %1, 0, 0, 0 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir new file mode 100644 index 00000000000..0043de5c445 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.fmin.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ds_fmin_ss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: ds_fmin_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY2]](p3), [[COPY3]](s32), 0, 0, 0 + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 + +... + +--- +name: ds_fmin_sv +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: ds_fmin_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY2]](p3), [[COPY1]](s32), 0, 0, 0 + %0:_(p3) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 + +... + +--- +name: ds_fmin_vs +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; CHECK-LABEL: name: ds_fmin_vs + ; CHECK: liveins: $vgpr0, $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY]](p3), [[COPY2]](s32), 0, 0, 0 + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 + +... + +--- +name: ds_fmin_vv +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: ds_fmin_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), [[COPY]](p3), [[COPY1]](s32), 0, 0, 0 + %0:_(p3) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.fmin), %0, %1, 0, 0, 0 + +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir new file mode 100644 index 00000000000..f6229057ea9 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.ds.permute.mir @@ -0,0 +1,24 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ds_permute_ss +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: ds_permute_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), [[COPY2]](s32), [[COPY3]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.permute), %0, %1 + +...