From edeaa0dc91445c62b7cfa171eeed72fd78c3ce30 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 16 Jul 2019 19:27:44 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Fix selection of private stores git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366249 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/BUFInstructions.td | 13 +- .../GlobalISel/inst-select-store-private.mir | 280 ++++++++++++++++++ 2 files changed, 287 insertions(+), 6 deletions(-) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 5b6c8a7ed96..62a19d848af 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -1568,17 +1568,18 @@ defm : MUBUFStore_Pattern ; multiclass MUBUFScratchStorePat { + ValueType vt, PatFrag st, + RegisterClass rc = VGPR_32> { def : GCNPat < (st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset)), - (InstrOffen $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; def : GCNPat < (st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset)), - (InstrOffset $value, $srsrc, $soffset, $offset, 0, 0, 0, 0) + (InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0) >; } @@ -1587,9 +1588,9 @@ defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; -defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; +defm : MUBUFScratchStorePat ; let OtherPredicates = [D16PreservesUnusedBits] in { diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir new file mode 100644 index 00000000000..822a1412d16 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -0,0 +1,280 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + +--- + +name: store_private_s32_to_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_4 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_4 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_s32_to_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_2 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_2 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 2, align 2, addrspace 5) + +... + +--- + +name: store_private_s32_to_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_s32_to_1 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_v2s16 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_v2s16 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](<2 x s16>), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_p3 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_p3 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p3), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_p5 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; GFX6-LABEL: name: store_private_p5 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX6: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX6: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5) + ; GFX9-LABEL: name: store_private_p5 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0 + ; GFX9: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY $vgpr1 + ; GFX9: G_STORE [[COPY]](p5), [[COPY1]](p5) :: (store 4, addrspace 5) + %0:vgpr(p5) = COPY $vgpr0 + %1:vgpr(p5) = COPY $vgpr1 + G_STORE %0, %1 :: (store 4, align 4, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_fi_offset_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_fi_offset_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_fi_offset_4095 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_FRAME_INDEX %stack.0 + %1:vgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(p5) = G_GEP %0, %1 + %3:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %3, %2 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_constant_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_constant_4095 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_constant_4095 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_CONSTANT i32 4095 + %1:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + +... + +--- + +name: store_private_s32_to_1_constant_4096 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: store_private_s32_to_1_constant_4096 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + ; GFX9-LABEL: name: store_private_s32_to_1_constant_4096 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec + ; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5) + %0:vgpr(p5) = G_CONSTANT i32 4096 + %1:vgpr(s32) = G_CONSTANT i32 0 + G_STORE %1, %0 :: (store 1, align 1, addrspace 5) + +... -- 2.40.0