return new SILowerControlFlowPass(tm);
}
+static bool isDS(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case AMDGPU::DS_ADD_U32_RTN:
+ case AMDGPU::DS_SUB_U32_RTN:
+ case AMDGPU::DS_WRITE_B32:
+ case AMDGPU::DS_WRITE_B8:
+ case AMDGPU::DS_WRITE_B16:
+ case AMDGPU::DS_READ_B32:
+ case AMDGPU::DS_READ_I8:
+ case AMDGPU::DS_READ_U8:
+ case AMDGPU::DS_READ_I16:
+ case AMDGPU::DS_READ_U16:
+ return true;
+ }
+}
+
bool SILowerControlFlowPass::shouldSkip(MachineBasicBlock *From,
MachineBasicBlock *To) {
Next = llvm::next(I);
MachineInstr &MI = *I;
+ if (isDS(MI.getOpcode())) {
+ NeedM0 = true;
+ NeedWQM = true;
+ }
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
IndirectDst(MI);
break;
- case AMDGPU::DS_READ_B32:
- NeedWQM = true;
- // Fall through
- case AMDGPU::DS_WRITE_B32:
- case AMDGPU::DS_ADD_U32_RTN:
- NeedM0 = true;
- break;
-
case AMDGPU::V_INTERP_P1_F32:
case AMDGPU::V_INTERP_P2_F32:
case AMDGPU::V_INTERP_MOV_F32:
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
%1 = load i8 addrspace(3)* %in
; R600-CHECK: ASHR
; SI-CHECK-LABEL: @load_i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
entry:
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_v2i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v2i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
; R600-CHECK: LDS_UBYTE_READ_RET
; SI-CHECK-LABEL: @load_v4i8_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
; SI-CHECK: DS_READ_U8
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v4i8_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
; SI-CHECK: DS_READ_I8
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
; R600-CHECK: ASHR
; SI-CHECK-LABEL: @load_i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
entry:
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_v2i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v2i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
; R600-CHECK: LDS_USHORT_READ_RET
; SI-CHECK-LABEL: @load_v4i16_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
; SI-CHECK: DS_READ_U16
; R600-CHECK-DAG: ASHR
; SI-CHECK-LABEL: @load_v4i16_sext_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
; SI-CHECK: DS_READ_I16
ret void
}
-; load an i32 value from the glocal address space.
+; load an i32 value from the local address space.
; R600-CHECK-LABEL: @load_i32_local
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_i32_local
; SI-CHECK-NOT: S_WQM_B64
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
entry:
ret void
}
-; load a f32 value from the global address space.
+; load a f32 value from the local address space.
; R600-CHECK-LABEL: @load_f32_local
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_f32_local
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
entry:
; R600-CHECK: LDS_READ_RET
; R600-CHECK: LDS_READ_RET
; SI-CHECK-LABEL: @load_v2f32_local
+; SI-CHECK: S_MOV_B32 m0
; SI-CHECK: DS_READ_B32
; SI-CHECK: DS_READ_B32
define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {