From ae17beb3c17f8d7a83a62f48ac8773ef2a77dbd6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 29 Jan 2019 18:13:02 +0000 Subject: [PATCH] GlobalISel: Fix narrowScalar for load/store with different mem size This was ignoring the memory size, and producing multiple loads/stores if the operand size was different from the memory size. I assume this is the intent of not having an explicit G_ANYEXTLOAD (although I think that would probably be better). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352523 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 29 +++++++- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 24 ++++++- .../AMDGPU/GlobalISel/legalize-load.mir | 68 +++++++++++++++++++ .../AMDGPU/GlobalISel/legalize-store.mir | 64 +++++++++++++++++ 4 files changed, 181 insertions(+), 4 deletions(-) diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index fef1605fecf..7e3c0ed809c 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -537,6 +537,18 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return UnableToLegalize; const auto &MMO = **MI.memoperands_begin(); + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + if (8 * MMO.getSize() != DstTy.getSizeInBits()) { + unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO); + MIRBuilder.buildAnyExt(DstReg, TmpReg); + MI.eraseFromParent(); + return Legalized; + } + // This implementation doesn't work for atomics. Give up instead of doing // something invalid. if (MMO.getOrdering() != AtomicOrdering::NotAtomic || @@ -566,8 +578,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, DstRegs.push_back(DstReg); } - unsigned DstReg = MI.getOperand(0).getReg(); - if(MRI.getType(DstReg).isVector()) + + if (DstTy.isVector()) MIRBuilder.buildBuildVector(DstReg, DstRegs); else MIRBuilder.buildMerge(DstReg, DstRegs); @@ -608,6 +620,19 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return UnableToLegalize; const auto &MMO = **MI.memoperands_begin(); + + unsigned SrcReg = MI.getOperand(0).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + if (8 * MMO.getSize() != SrcTy.getSizeInBits()) { + unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy); + auto &MMO = **MI.memoperands_begin(); + MIRBuilder.buildTrunc(TmpReg, SrcReg); + MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO); + MI.eraseFromParent(); + return Legalized; + } + // This implementation doesn't work for atomics. Give up instead of doing // something invalid. if (MMO.getOrdering() != AtomicOrdering::NotAtomic || diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 9e6fa061c10..1338ffed6e0 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -229,12 +229,31 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, }); getActionDefinitionsBuilder({G_LOAD, G_STORE}) + .narrowScalarIf([](const LegalityQuery &Query) { + unsigned Size = Query.Types[0].getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + return (Size > 32 && MemSize < Size); + }, + [](const LegalityQuery &Query) { + return std::make_pair(0, LLT::scalar(32)); + }) .legalIf([=, &ST](const LegalityQuery &Query) { const LLT &Ty0 = Query.Types[0]; + unsigned Size = Ty0.getSizeInBits(); + unsigned MemSize = Query.MMODescrs[0].SizeInBits; + if (Size > 32 && MemSize < Size) + return false; + + if (Ty0.isVector() && Size != MemSize) + return false; + // TODO: Decompose private loads into 4-byte components. // TODO: Illegal flat loads on SI - switch (Ty0.getSizeInBits()) { + switch (MemSize) { + case 8: + case 16: + return Size == 32; case 32: case 64: case 128: @@ -250,7 +269,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, default: return false; } - }); + }) + .clampScalar(0, S32, S64); auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD}) diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir index cf25192ca6a..57f485b3dae 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-load.mir @@ -129,3 +129,71 @@ body: | $vgpr0_vgpr1_vgpr2 = COPY %1 ... + +--- +name: test_ext_load_global_s64_from_1_align1 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s64_from_1_align1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_LOAD %0 :: (load 1, addrspace 1, align 4) + + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_ext_load_global_s64_from_2_align2 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s64_from_2_align2 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_LOAD %0 :: (load 2, addrspace 1, align 4) + + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_ext_load_global_s64_from_4_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s64_from_4_align4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_LOAD %0 :: (load 4, addrspace 1, align 4) + + $vgpr0_vgpr1 = COPY %1 +... + +--- +name: test_ext_load_global_s128_from_4_align4 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_ext_load_global_s128_from_4_align4 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[LOAD]](s32) + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[ANYEXT]](s128) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s128) = G_LOAD %0 :: (load 4, addrspace 1, align 4) + + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index cb4f83296d2..4f9abca6066 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -120,3 +120,67 @@ body: | %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 G_STORE %1, %0 :: (store 12, align 4, addrspace 1) ... + +--- +name: test_truncstore_global_s64_to_s8 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_truncstore_global_s64_to_s8 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 1, addrspace 1) +... + +--- +name: test_truncstore_global_s64_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_truncstore_global_s64_to_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 1, addrspace 1) +... + +--- +name: test_truncstore_global_s64_to_s32 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: test_truncstore_global_s64_to_s32 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 4, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + G_STORE %1, %0 :: (store 4, addrspace 1) +... + +--- +name: test_truncstore_global_s128_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 + + ; CHECK-LABEL: name: test_truncstore_global_s128_to_s16 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s128) + ; CHECK: G_STORE [[TRUNC]](s32), [[COPY]](p1) :: (store 1, addrspace 1) + %0:_(p1) = COPY $vgpr0_vgpr1 + %1:_(s128) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 + G_STORE %1, %0 :: (store 1, addrspace 1) +... -- 2.40.0