From: Matt Arsenault Date: Mon, 1 Jul 2019 16:27:32 +0000 (+0000) Subject: AMDGPU/GlobalISel: Try to select VOP3 form of add X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a7bea5605ca1c30cc38223bc8bebe5f6df53c1da;p=llvm AMDGPU/GlobalISel: Try to select VOP3 form of add There are several things broken, but at least emit the right thing for gfx9. The import of the pattern with the unused carry out seems to not work. Needs a special class for clamp, because OperandWithDefaultOps doesn't really work. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@364804 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td index accb8eac9f0..ac2951ca532 100644 --- a/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/lib/Target/AMDGPU/AMDGPUGISel.td @@ -70,6 +70,17 @@ class GISelVop2Pat < (inst src0_vt:$src0, src1_vt:$src1) >; +// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken. +class GISelVop2ClampingPat < + SDPatternOperator node, + Instruction inst, + ValueType dst_vt, + ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat < + + (dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))), + (inst src0_vt:$src0, src1_vt:$src1, 0) +>; + class GISelVop2CommutePat < SDPatternOperator node, Instruction inst, @@ -129,7 +140,16 @@ def : GISelSop2Pat ; def : GISelVop2Pat ; def : GISelSop2Pat ; + +let SubtargetPredicate = NotHasAddNoCarryInsts in { +// FIXME: This should use the VOP3 form +//def : GISelVop2ClampingPat ; def : GISelVop2Pat ; +} + +let SubtargetPredicate = HasAddNoCarryInsts in { +def : GISelVop2ClampingPat ; +} def : GISelSop2Pat ; let AddedComplexity = 100 in { diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir index f2db55685b5..8f7973d4eca 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir @@ -1,24 +1,37 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- -name: add_i32 +name: add_s32 legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4 - ; GCN-LABEL: name: add_i32 - ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 - ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 - ; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc - ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec - ; GCN: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec - ; GCN: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec - ; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + + ; GFX6-LABEL: name: add_s32 + ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec + ; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec + ; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec + ; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9-LABEL: name: add_s32 + ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4 + ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec + ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec + ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec + ; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr + %0:sgpr(s32) = COPY $sgpr0 %1:sgpr(s32) = COPY $sgpr1 %2:vgpr(s32) = COPY $vgpr0