From 37f081b80d35231f4b1528c92d54809da65f3ae7 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 13 Jul 2018 22:16:03 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Implement select() for 32-bit @llvm.minnun and @llvm.maxnum Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D46172 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337056 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUGISel.td | 17 +++++ .../AMDGPU/AMDGPUInstructionSelector.cpp | 2 + .../AMDGPU/GlobalISel/inst-select-maxnum.mir | 66 +++++++++++++++++++ .../AMDGPU/GlobalISel/inst-select-minnum.mir | 65 ++++++++++++++++++ 4 files changed, 150 insertions(+) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-maxnum.mir create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-minnum.mir diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td index fd1b29384ce..ba735390f67 100644 --- a/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/lib/Target/AMDGPU/AMDGPUGISel.td @@ -85,6 +85,18 @@ class GISelVop3Pat2CommutePat < (inst src0_vt:$src1, src1_vt:$src0) >; +class GISelVop3Pat2ModsPat < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (VOP3Mods0 src0_vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omods)), + (src1_vt (VOP3Mods src1_vt:$src1, i32:$src1_modifiers)))), + (inst i32:$src0_modifiers, src0_vt:$src0, + i32:$src1_modifiers, src1_vt:$src1, $clamp, $omods) +>; + multiclass GISelVop2IntrPat < SDPatternOperator node, Instruction inst, ValueType dst_vt, ValueType src_vt = dst_vt> { @@ -119,3 +131,8 @@ def : GISelVop3Pat2CommutePat ; // this is even supported yet. defm : GISelVop2IntrPat < int_amdgcn_cvt_pkrtz, V_CVT_PKRTZ_F16_F32_e32, v2f16, f32>; + +defm : GISelVop2IntrPat ; +def : GISelVop3Pat2ModsPat ; +defm : GISelVop2IntrPat ; +def : GISelVop3Pat2ModsPat ; diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 7db6edc5575..219d430fbb3 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -186,6 +186,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I, switch (IntrinsicID) { default: break; + case Intrinsic::maxnum: + case Intrinsic::minnum: case Intrinsic::amdgcn_cvt_pkrtz: return selectImpl(I, CoverageInfo); diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-maxnum.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-maxnum.mir new file mode 100644 index 00000000000..a473259201d --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-maxnum.mir @@ -0,0 +1,66 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @maxnum(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: maxnum +legalized: true +regBankSelected: true + +# GCN-LABEL: name: maxnum +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 + ; GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[VGPR1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + + ; GCN: [[SGPR64_0:%[0-9]+]]:sreg_64_xexec = COPY $sgpr10_sgpr11 + ; GCN: [[VGPR64_0:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 + ; GCN: [[VGPR64_1:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 + %10:sgpr(s64) = COPY $sgpr10_sgpr11 + %11:vgpr(s64) = COPY $vgpr10_vgpr11 + %12:vgpr(s64) = COPY $vgpr12_vgpr13 + + ; maxnum vs + ; GCN: V_MAX_F32_e32 [[SGPR0]], [[VGPR0]] + %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.maxnum.f32), %1, %0 + + ; maxnum sv + ; GCN: V_MAX_F32_e32 [[SGPR0]], [[VGPR0]] + %5:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.maxnum.f32), %0, %1 + + ; maxnum vv + ; GCN: V_MAX_F32_e32 [[VGPR0]], [[VGPR1]] + %6:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.maxnum.f32), %1, %2 + + G_STORE %4, %3 :: (store 4 into %ir.global0) + G_STORE %5, %3 :: (store 4 into %ir.global0) + G_STORE %6, %3 :: (store 4 into %ir.global0) + + ; 64-bit + + ; maxnum vs + ; GCN: V_MAX_F64 0, [[SGPR64_0]], 0, [[VGPR64_0]], 0, 0 + %14:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.maxnum.f64), %10, %11 + + ; maxnum sv + ; GCN: V_MAX_F64 0, [[VGPR64_0]], 0, [[SGPR64_0]], 0, 0 + %15:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.maxnum.f64), %11, %10 + + ; maxnum vv + ; GCN: V_MAX_F64 0, [[VGPR64_0]], 0, [[VGPR64_1]], 0, 0 + %16:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.maxnum.f64), %11, %12 + + G_STORE %14, %3 :: (store 8 into %ir.global0) + G_STORE %15, %3 :: (store 8 into %ir.global0) + G_STORE %16, %3 :: (store 8 into %ir.global0) + +... +--- diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-minnum.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-minnum.mir new file mode 100644 index 00000000000..0bfe9bb7217 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-minnum.mir @@ -0,0 +1,65 @@ +# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @minnum(i32 addrspace(1)* %global0) { ret void } +... +--- + +name: minnum +legalized: true +regBankSelected: true + +# GCN-LABEL: name: minnum +body: | + bb.0: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr3_vgpr4, $sgpr10_sgpr11, $vgpr10_vgpr11, $vgpr12_vgpr13 + ; GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0 + ; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[VGPR1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr1 + %3:vgpr(s64) = COPY $vgpr3_vgpr4 + + ; GCN: [[SGPR64_0:%[0-9]+]]:sreg_64_xexec = COPY $sgpr10_sgpr11 + ; GCN: [[VGPR64_0:%[0-9]+]]:vreg_64 = COPY $vgpr10_vgpr11 + ; GCN: [[VGPR64_1:%[0-9]+]]:vreg_64 = COPY $vgpr12_vgpr13 + %10:sgpr(s64) = COPY $sgpr10_sgpr11 + %11:vgpr(s64) = COPY $vgpr10_vgpr11 + %12:vgpr(s64) = COPY $vgpr12_vgpr13 + + ; minnum vs + ; GCN: V_MIN_F32_e32 [[SGPR0]], [[VGPR0]] + %4:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.minnum.f32), %1, %0 + + ; minnum sv + ; GCN: V_MIN_F32_e32 [[SGPR0]], [[VGPR0]] + %5:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.minnum.f32), %0, %1 + + ; minnum vv + ; GCN: V_MIN_F32_e32 [[VGPR0]], [[VGPR1]] + %6:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.minnum.f32), %1, %2 + + G_STORE %4, %3 :: (store 4 into %ir.global0) + G_STORE %5, %3 :: (store 4 into %ir.global0) + G_STORE %6, %3 :: (store 4 into %ir.global0) + + ; 64-bit + + ; minnum vs + ; GCN: V_MIN_F64 0, [[SGPR64_0]], 0, [[VGPR64_0]], 0, 0 + %14:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.minnum.f64), %10, %11 + + ; minnum sv + ; GCN: V_MIN_F64 0, [[VGPR64_0]], 0, [[SGPR64_0]], 0, 0 + %15:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.minnum.f64), %11, %10 + + ; minnum vv + ; GCN: V_MIN_F64 0, [[VGPR64_0]], 0, [[VGPR64_1]], 0, 0 + %16:vgpr(s64) = G_INTRINSIC intrinsic(@llvm.minnum.f64), %11, %12 + + G_STORE %14, %3 :: (store 8 into %ir.global0) + G_STORE %15, %3 :: (store 8 into %ir.global0) + G_STORE %16, %3 :: (store 8 into %ir.global0) +... +--- -- 2.50.1