From b2bf7e8c160beb7f02ebebdc7a36673b220193cd Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 9 Sep 2019 16:03:45 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Fix regbankselect for uniform extloads There are no scalar extloads. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 8 +- .../AMDGPU/GlobalISel/regbankselect-load.mir | 103 +++++++++++++++--- 2 files changed, 89 insertions(+), 22 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 651ce9596d9..5414dd241ae 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -320,12 +320,12 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( } } -static bool isInstrUniform(const MachineInstr &MI) { +static bool isInstrUniformNonExtLoad(const MachineInstr &MI) { if (!MI.hasOneMemOperand()) return false; const MachineMemOperand *MMO = *MI.memoperands_begin(); - return AMDGPUInstrInfo::isUniformMMO(MMO); + return MMO->getSize() >= 4 && AMDGPUInstrInfo::isUniformMMO(MMO); } RegisterBankInfo::InstructionMappings @@ -426,7 +426,7 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( unsigned PtrSize = PtrTy.getSizeInBits(); unsigned AS = PtrTy.getAddressSpace(); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - if (isInstrUniform(MI) && + if (isInstrUniformNonExtLoad(MI) && (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( @@ -1482,7 +1482,7 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if (isInstrUniform(MI) && + if (isInstrUniformNonExtLoad(MI) && (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index fd4d47e979b..1370ed3fa5f 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -61,13 +61,17 @@ define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void} define amdgpu_kernel void @load_local_uniform() { ret void } define amdgpu_kernel void @load_region_uniform() { ret void } + define amdgpu_kernel void @extload_constant_i8_to_i32_uniform() { ret void } + define amdgpu_kernel void @extload_global_i8_to_i32_uniform() { ret void } + define amdgpu_kernel void @extload_constant_i16_to_i32_uniform() { ret void } + define amdgpu_kernel void @extload_global_i16_to_i32_uniform() { ret void } declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } ... --- -name : load_global_v8i32_non_uniform +name: load_global_v8i32_non_uniform legalized: true body: | @@ -102,7 +106,7 @@ body: | ... --- -name : load_global_v4i64_non_uniform +name: load_global_v4i64_non_uniform legalized: true body: | @@ -129,7 +133,7 @@ body: | ... --- -name : load_global_v16i32_non_uniform +name: load_global_v16i32_non_uniform legalized: true body: | @@ -185,7 +189,7 @@ body: | %1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32) ... -name : load_global_v8i64_non_uniform +name: load_global_v8i64_non_uniform legalized: true body: | @@ -226,7 +230,7 @@ body: | ... --- -name : load_global_v8i32_uniform +name: load_global_v8i32_uniform legalized: true body: | @@ -239,7 +243,7 @@ body: | ... --- -name : load_global_v4i64_uniform +name: load_global_v4i64_uniform legalized: true body: | @@ -252,7 +256,7 @@ body: | ... --- -name : load_global_v16i32_uniform +name: load_global_v16i32_uniform legalized: true body: | @@ -265,7 +269,7 @@ body: | ... --- -name : load_global_v8i64_uniform +name: load_global_v8i64_uniform legalized: true body: | @@ -278,7 +282,7 @@ body: | ... --- -name : load_constant_v8i32_non_uniform +name: load_constant_v8i32_non_uniform legalized: true body: | @@ -313,7 +317,7 @@ body: | ... --- -name : load_constant_v4i64_non_uniform +name: load_constant_v4i64_non_uniform legalized: true body: | @@ -340,7 +344,7 @@ body: | ... --- -name : load_constant_v16i32_non_uniform +name: load_constant_v16i32_non_uniform legalized: true body: | @@ -397,7 +401,7 @@ body: | ... --- -name : load_constant_v8i64_non_uniform +name: load_constant_v8i64_non_uniform legalized: true body: | @@ -438,7 +442,7 @@ body: | ... --- -name : load_constant_v8i32_uniform +name: load_constant_v8i32_uniform legalized: true body: | @@ -451,7 +455,7 @@ body: | ... --- -name : load_constant_v4i64_uniform +name: load_constant_v4i64_uniform legalized: true body: | @@ -464,7 +468,7 @@ body: | ... --- -name : load_constant_v16i32_uniform +name: load_constant_v16i32_uniform legalized: true body: | @@ -477,7 +481,7 @@ body: | ... --- -name : load_constant_v8i64_uniform +name: load_constant_v8i64_uniform legalized: true body: | @@ -490,7 +494,7 @@ body: | ... --- -name : load_local_uniform +name: load_local_uniform legalized: true body: | bb.0: @@ -505,7 +509,7 @@ body: | ... --- -name : load_region_uniform +name: load_region_uniform legalized: true body: | bb.0: @@ -519,3 +523,66 @@ body: | %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5) ... + +--- +name: extload_constant_i8_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 4) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 4, align 1) +... + +--- +name: extload_global_i8_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform{{$}} + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 1) + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 1, align 1) +... + +--- +name: extload_constant_i16_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 4) + + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 4, align 2) +... + +--- +name: extload_global_i16_to_i32_uniform +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform + ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK: %2:vgpr(p4) = COPY %0(p4) + ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 1) + + %0:_(p4) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2) +... -- 2.40.0