}
}
-static bool isInstrUniform(const MachineInstr &MI) {
+static bool isInstrUniformNonExtLoad(const MachineInstr &MI) {
if (!MI.hasOneMemOperand())
return false;
const MachineMemOperand *MMO = *MI.memoperands_begin();
- return AMDGPUInstrInfo::isUniformMMO(MMO);
+ return MMO->getSize() >= 4 && AMDGPUInstrInfo::isUniformMMO(MMO);
}
RegisterBankInfo::InstructionMappings
unsigned PtrSize = PtrTy.getSizeInBits();
unsigned AS = PtrTy.getAddressSpace();
LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
- if (isInstrUniform(MI) &&
+ if (isInstrUniformNonExtLoad(MI) &&
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
const InstructionMapping &SSMapping = getInstructionMapping(
1, 1, getOperandsMapping(
const ValueMapping *ValMapping;
const ValueMapping *PtrMapping;
- if (isInstrUniform(MI) &&
+ if (isInstrUniformNonExtLoad(MI) &&
(AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) {
// We have a uniform instruction so we want to use an SMRD load
ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void}
define amdgpu_kernel void @load_local_uniform() { ret void }
define amdgpu_kernel void @load_region_uniform() { ret void }
+ define amdgpu_kernel void @extload_constant_i8_to_i32_uniform() { ret void }
+ define amdgpu_kernel void @extload_global_i8_to_i32_uniform() { ret void }
+ define amdgpu_kernel void @extload_constant_i16_to_i32_uniform() { ret void }
+ define amdgpu_kernel void @extload_global_i16_to_i32_uniform() { ret void }
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
...
---
-name : load_global_v8i32_non_uniform
+name: load_global_v8i32_non_uniform
legalized: true
body: |
...
---
-name : load_global_v4i64_non_uniform
+name: load_global_v4i64_non_uniform
legalized: true
body: |
...
---
-name : load_global_v16i32_non_uniform
+name: load_global_v16i32_non_uniform
legalized: true
body: |
%1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32)
...
-name : load_global_v8i64_non_uniform
+name: load_global_v8i64_non_uniform
legalized: true
body: |
...
---
-name : load_global_v8i32_uniform
+name: load_global_v8i32_uniform
legalized: true
body: |
...
---
-name : load_global_v4i64_uniform
+name: load_global_v4i64_uniform
legalized: true
body: |
...
---
-name : load_global_v16i32_uniform
+name: load_global_v16i32_uniform
legalized: true
body: |
...
---
-name : load_global_v8i64_uniform
+name: load_global_v8i64_uniform
legalized: true
body: |
...
---
-name : load_constant_v8i32_non_uniform
+name: load_constant_v8i32_non_uniform
legalized: true
body: |
...
---
-name : load_constant_v4i64_non_uniform
+name: load_constant_v4i64_non_uniform
legalized: true
body: |
...
---
-name : load_constant_v16i32_non_uniform
+name: load_constant_v16i32_non_uniform
legalized: true
body: |
...
---
-name : load_constant_v8i64_non_uniform
+name: load_constant_v8i64_non_uniform
legalized: true
body: |
...
---
-name : load_constant_v8i32_uniform
+name: load_constant_v8i32_uniform
legalized: true
body: |
...
---
-name : load_constant_v4i64_uniform
+name: load_constant_v4i64_uniform
legalized: true
body: |
...
---
-name : load_constant_v16i32_uniform
+name: load_constant_v16i32_uniform
legalized: true
body: |
...
---
-name : load_constant_v8i64_uniform
+name: load_constant_v8i64_uniform
legalized: true
body: |
...
---
-name : load_local_uniform
+name: load_local_uniform
legalized: true
body: |
bb.0:
...
---
-name : load_region_uniform
+name: load_region_uniform
legalized: true
body: |
bb.0:
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5)
...
+
+---
+name: extload_constant_i8_to_i32_uniform
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform
+ ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK: %2:vgpr(p4) = COPY %0(p4)
+ ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 4)
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 4, align 1)
+...
+
+---
+name: extload_global_i8_to_i32_uniform
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform{{$}}
+ ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK: %2:vgpr(p4) = COPY %0(p4)
+ ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 1)
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: (load 1, addrspace 1, align 1)
+...
+
+---
+name: extload_constant_i16_to_i32_uniform
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform
+ ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK: %2:vgpr(p4) = COPY %0(p4)
+ ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 4)
+
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 4, align 2)
+...
+
+---
+name: extload_global_i16_to_i32_uniform
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform
+ ; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
+ ; CHECK: %2:vgpr(p4) = COPY %0(p4)
+ ; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 1)
+
+ %0:_(p4) = COPY $sgpr0_sgpr1
+ %1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2)
+...