From dfa7d6f913122d77ae0aae6575e253a14855a1c5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 1 Aug 2019 00:53:38 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Select local loads git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@367498 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUGISel.td | 12 + .../AMDGPU/AMDGPUInstructionSelector.cpp | 84 +- lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 9 +- lib/Target/AMDGPU/AMDGPUInstructions.td | 2 + lib/Target/AMDGPU/SIInstrInfo.td | 10 +- .../GlobalISel/inst-select-load-local.mir | 906 ++++++++++++++++++ 6 files changed, 1014 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir diff --git a/lib/Target/AMDGPU/AMDGPUGISel.td b/lib/Target/AMDGPU/AMDGPUGISel.td index cad4c2ef404..2b493935cd0 100644 --- a/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/lib/Target/AMDGPU/AMDGPUGISel.td @@ -64,6 +64,18 @@ def gi_mubuf_scratch_offen : GIComplexOperandMatcher, GIComplexPatternEquiv; +def gi_ds_1addr_1offset : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + + +// Separate load nodes are defined to glue m0 initialization in +// SelectionDAG. The GISel selector can just insert m0 initialization +// directly before before selecting a glue-less load, so hide this +// distinction. +def : GINodeEquiv; + + class GISelSop2Pat < SDPatternOperator node, diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 5db2e6fb4e6..06ba7d53c76 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1243,10 +1243,22 @@ bool AMDGPUInstructionSelector::hasVgprParts(ArrayRef AddrInfo) const { return false; } -bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { - // TODO: Can/should we insert m0 initialization here for DS instructions and - // call the normal selector? - return false; +bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I, + CodeGenCoverage &CoverageInfo) const { + MachineBasicBlock *BB = I.getParent(); + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + + const LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); + unsigned AS = PtrTy.getAddressSpace(); + if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) && + STI.ldsRequiresM0Init()) { + // If DS instructions require M0 initializtion, insert it before selecting. + BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addImm(-1); + } + + return selectImpl(I, CoverageInfo); } bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const { @@ -1364,7 +1376,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I, return true; return selectImpl(I, CoverageInfo); case TargetOpcode::G_LOAD: - return selectImpl(I, CoverageInfo); + return selectG_LOAD(I, CoverageInfo); case TargetOpcode::G_SELECT: return selectG_SELECT(I); case TargetOpcode::G_STORE: @@ -1698,6 +1710,22 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { }}}; } +bool AMDGPUInstructionSelector::isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, + unsigned OffsetBits) const { + if ((OffsetBits == 16 && !isUInt<16>(Offset)) || + (OffsetBits == 8 && !isUInt<8>(Offset))) + return false; + + if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled()) + return true; + + // On Southern Islands instruction with a negative base value and an offset + // don't seem to work. + return signBitIsZero(Base, MRI); +} + InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectMUBUFScratchOffset( MachineOperand &Root) const { @@ -1726,3 +1754,49 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffset( [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset }}; } + +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectDS1Addr1Offset(MachineOperand &Root) const { + MachineInstr *MI = Root.getParent(); + MachineBasicBlock *MBB = MI->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + const MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); + if (!RootDef) { + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; + } + + int64_t ConstAddr = 0; + if (isBaseWithConstantOffset(Root, MRI)) { + const MachineOperand &LHS = RootDef->getOperand(1); + const MachineOperand &RHS = RootDef->getOperand(2); + const MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); + const MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); + if (LHSDef && RHSDef) { + int64_t PossibleOffset = + RHSDef->getOperand(1).getCImm()->getSExtValue(); + if (isDSOffsetLegal(MRI, LHS, PossibleOffset, 16)) { + // (add n0, c0) + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(PossibleOffset); } + }}; + } + } + } else if (RootDef->getOpcode() == AMDGPU::G_SUB) { + + + + } else if (mi_match(Root.getReg(), MRI, m_ICst(ConstAddr))) { + + + } + + return {{ + [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, + [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } + }}; +} diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 4f489ddfb23..8ec241bac49 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -90,7 +90,7 @@ private: void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI, SmallVectorImpl &AddrInfo) const; bool selectSMRD(MachineInstr &I, ArrayRef AddrInfo) const; - bool selectG_LOAD(MachineInstr &I) const; + bool selectG_LOAD(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; bool selectG_SELECT(MachineInstr &I) const; bool selectG_STORE(MachineInstr &I) const; bool selectG_BRCOND(MachineInstr &I) const; @@ -133,6 +133,13 @@ private: InstructionSelector::ComplexRendererFns selectMUBUFScratchOffset(MachineOperand &Root) const; + bool isDSOffsetLegal(const MachineRegisterInfo &MRI, + const MachineOperand &Base, + int64_t Offset, unsigned OffsetBits) const; + + InstructionSelector::ComplexRendererFns + selectDS1Addr1Offset(MachineOperand &Root) const; + const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index 0be9ec3eff1..07f5123ea51 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -480,11 +480,13 @@ def atomic_store_local : LocalStore ; def load_align8_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { let IsLoad = 1; + let IsNonExtLoad = 1; let MinAlignment = 8; } def load_align16_local : PatFrag <(ops node:$ptr), (load_local node:$ptr)> { let IsLoad = 1; + let IsNonExtLoad = 1; let MinAlignment = 16; } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index a035bb63e7c..92d6ab30bf9 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -328,13 +328,13 @@ def AMDGPUatomic_ld_glue : SDNode <"ISD::ATOMIC_LOAD", SDTAtomicLoad, >; def unindexedload_glue : PatFrag <(ops node:$ptr), (AMDGPUld_glue node:$ptr)> { - let IsUnindexed = 1; let IsLoad = 1; + let IsUnindexed = 1; } def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> { - let IsNonExtLoad = 1; let IsLoad = 1; + let IsNonExtLoad = 1; } def atomic_load_32_glue : PatFrag<(ops node:$ptr), @@ -396,7 +396,9 @@ def sextloadi16_glue : PatFrag<(ops node:$ptr), (sextload_glue node:$ptr)> { let IsLoad = 1, AddressSpaces = LoadAddress_local.AddrSpaces in { -def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)>; +def load_local_m0 : PatFrag<(ops node:$ptr), (load_glue node:$ptr)> { + let IsNonExtLoad = 1; +} let MemoryVT = i8 in { def extloadi8_local_m0 : PatFrag<(ops node:$ptr), (extloadi8_glue node:$ptr)>; @@ -412,9 +414,11 @@ def zextloadi16_local_m0 : PatFrag<(ops node:$ptr), (zextloadi16_glue node:$ptr) def load_align8_local_m0 : LoadFrag , LocalAddress { let MinAlignment = 8; + let IsNonExtLoad = 1; } def load_align16_local_m0 : LoadFrag , LocalAddress { let MinAlignment = 16; + let IsNonExtLoad = 1; } } // End IsLoad = 1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir new file mode 100644 index 00000000000..602b1141ed7 --- /dev/null +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir @@ -0,0 +1,906 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s + + +--- + +name: load_local_s32_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX7-LABEL: name: load_local_s32_from_4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_B32_]] + ; GFX9-LABEL: name: load_local_s32_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_B32_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_s32_from_2 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_2 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX7-LABEL: name: load_local_s32_from_2 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U16_:%[0-9]+]]:vgpr_32 = DS_READ_U16 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 2, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U16_]] + ; GFX9-LABEL: name: load_local_s32_from_2 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 2, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U16_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_s32_from_1 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + scratchWaveOffsetReg: $sgpr4 + stackPtrOffsetReg: $sgpr32 + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_v2s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX6-LABEL: name: load_local_v2s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX7-LABEL: name: load_local_v2s32 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_B64_:%[0-9]+]]:vreg_64 = DS_READ_B64 [[COPY]], 0, 0, implicit $m0, implicit $exec :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ_B64_]] + ; GFX9-LABEL: name: load_local_v2s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[COPY]], 0, 0, implicit $exec :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[DS_READ_B64_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2s32_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_DWORDX2_]] + ; GFX6-LABEL: name: load_local_v2s32_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX7-LABEL: name: load_local_v2s32_align4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + ; GFX9-LABEL: name: load_local_v2s32_align4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x s32>) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x s32>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s32>) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v3s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v3s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX7-LABEL: name: load_local_v3s32 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + ; GFX9-LABEL: name: load_local_v3s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(<3 x s32>) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](<3 x s32>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<3 x s32>) = G_LOAD %0 :: (load 12, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_local_v4s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v4s32 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX7-LABEL: name: load_local_v4s32 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + ; GFX9-LABEL: name: load_local_v4s32 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<4 x s32>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<4 x s32>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7-LABEL: name: load_local_s64 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_local_s64 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_s64_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s64_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX7-LABEL: name: load_local_s64_align4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + ; GFX9-LABEL: name: load_local_s64_align4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(s64) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](s64) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s64) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2s64 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX7-LABEL: name: load_local_v2s64 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + ; GFX9-LABEL: name: load_local_v2s64 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x s64>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_v2p1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2p1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX7-LABEL: name: load_local_v2p1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + ; GFX9-LABEL: name: load_local_v2p1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_s96 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s96 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX7-LABEL: name: load_local_s96 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + ; GFX9-LABEL: name: load_local_s96 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_96(s96) = G_LOAD [[COPY]](p3) :: (load 12, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[LOAD]](s96) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s96) = G_LOAD %0 :: (load 12, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2 = COPY %1 + +... + +--- + +name: load_local_s128 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s128 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX7-LABEL: name: load_local_s128 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + ; GFX9-LABEL: name: load_local_s128 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s128) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +--- + +name: load_local_p3_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p3_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-LABEL: name: load_local_p3_from_4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_local_p3_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_p5_from_4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p5_from_4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[LOAD]](p3) + ; GFX7-LABEL: name: load_local_p5_from_4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[LOAD]](p3) + ; GFX9-LABEL: name: load_local_p5_from_4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(p3) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](p3) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p3) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_p1_align8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p1_align8 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-LABEL: name: load_local_p1_align8 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_local_p1_align8 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_p1_align4 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p1_align4 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX7-LABEL: name: load_local_p1_align4 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + ; GFX9-LABEL: name: load_local_p1_align4 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p1) = G_LOAD [[COPY]](p3) :: (load 8, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p1) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p1) = G_LOAD %0 :: (load 8, align 4, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_p999_from_8 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_p999_from_8 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX7-LABEL: name: load_local_p999_from_8 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + ; GFX9-LABEL: name: load_local_p999_from_8 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(p999) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](p999) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(p999) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2p3 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2p3 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX7-LABEL: name: load_local_v2p3 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + ; GFX9-LABEL: name: load_local_v2p3 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<2 x p3>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<2 x p3>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x p3>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +--- + +name: load_local_v2s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v2s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX6: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX7-LABEL: name: load_local_v2s16 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX7: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; GFX9-LABEL: name: load_local_v2s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vgpr_32(<2 x s16>) = G_LOAD [[COPY]](p3) :: (load 4, addrspace 3) + ; GFX9: $vgpr0 = COPY [[LOAD]](<2 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<2 x s16>) = G_LOAD %0 :: (load 4, align 4, addrspace 3) + $vgpr0 = COPY %1 + +... + +--- + +name: load_local_v4s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v4s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX6: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX7-LABEL: name: load_local_v4s16 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX7: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + ; GFX9-LABEL: name: load_local_v4s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_64(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load 8, addrspace 3) + ; GFX9: $vgpr0_vgpr1 = COPY [[LOAD]](<4 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<4 x s16>) = G_LOAD %0 :: (load 8, align 8, addrspace 3) + $vgpr0_vgpr1 = COPY %1 + +... + +# --- + +# name: load_local_v6s16 +# legalized: true +# regBankSelected: true +# tracksRegLiveness: true +# machineFunctionInfo: +# scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 +# scratchWaveOffsetReg: $sgpr4 +# stackPtrOffsetReg: $sgpr32 + +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:vgpr(p3) = COPY $vgpr0 +# %1:vgpr(<6 x s16>) = G_LOAD %0 :: (load 12, align 4, addrspace 3) +# $vgpr0_vgpr1_vgpr2 = COPY %1 + +# ... + +--- + +name: load_local_v8s16 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_v8s16 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX7-LABEL: name: load_local_v8s16 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX7: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + ; GFX9-LABEL: name: load_local_v8s16 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0 + ; GFX9: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load 16, align 4, addrspace 3) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>) + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load 16, align 4, addrspace 3) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 + +... + +################################################################################ +### Stress addressing modes +################################################################################ + +--- + +name: load_local_s32_from_1_gep_65535 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65535 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1_gep_65535 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 [[COPY]], 65535, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1_gep_65535 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[COPY]], 65535, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 65535 + %2:vgpr(p3) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %3 + +... + +--- + +name: load_local_s32_from_1_gep_65536 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1_gep_65536 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1_gep_65536 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1_gep_65536 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 65536 + %2:vgpr(p3) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %3 + +... + +--- + +name: load_local_s32_from_1_gep_m1 +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; GFX6-LABEL: name: load_local_s32_from_1_gep_m1 + ; GFX6: liveins: $vgpr0 + ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX6: $m0 = S_MOV_B32 -1 + ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX7-LABEL: name: load_local_s32_from_1_gep_m1 + ; GFX7: liveins: $vgpr0 + ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX7: $m0 = S_MOV_B32 -1 + ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3) + ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]] + ; GFX9-LABEL: name: load_local_s32_from_1_gep_m1 + ; GFX9: liveins: $vgpr0 + ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec + ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec + ; GFX9: [[DS_READ_U8_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[V_ADD_U32_e64_]], 0, 0, implicit $exec :: (load 1, addrspace 3) + ; GFX9: $vgpr0 = COPY [[DS_READ_U8_gfx9_]] + %0:vgpr(p3) = COPY $vgpr0 + %1:vgpr(s32) = G_CONSTANT i32 -1 + %2:vgpr(p3) = G_GEP %0, %1 + %3:vgpr(s32) = G_LOAD %2 :: (load 1, align 1, addrspace 3) + $vgpr0 = COPY %3 + +... -- 2.40.0