From: Matt Arsenault Date: Thu, 11 May 2017 17:38:33 +0000 (+0000) Subject: AMDGPU: Remove tfe bit from flat instruction definitions X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9f4e5a06c66eb4e804d630337e98435446dade93;p=llvm AMDGPU: Remove tfe bit from flat instruction definitions We don't use it and it was removed in gfx9, and the encoding bit repurposed. Additionally actually using it requires changing the output register class, which wasn't done anyway. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302814 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ccae36ced1f..7c99752b881 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -136,8 +136,7 @@ private: bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset, SDValue &ImmOffset, SDValue &VOffset) const; - bool SelectFlat(SDValue Addr, SDValue &VAddr, - SDValue &SLC, SDValue &TFE) const; + bool SelectFlat(SDValue Addr, SDValue &VAddr, SDValue &SLC) const; bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, bool &Imm) const; @@ -1278,10 +1277,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset, bool AMDGPUDAGToDAGISel::SelectFlat(SDValue Addr, SDValue &VAddr, - SDValue &SLC, - SDValue &TFE) const { + SDValue &SLC) const { VAddr = Addr; - TFE = SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); + SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1); return true; } diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 8867ed689a3..a7eac080f88 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -127,9 +127,9 @@ bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const { .add(I.getOperand(1)) .add(I.getOperand(0)) .addImm(0) - .addImm(0) .addImm(0); + // Now that we selected an opcode, we need to constrain the register // operands to use appropriate classes. bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); @@ -393,7 +393,6 @@ bool AMDGPUInstructionSelector::selectG_LOAD(MachineInstr &I) const { .add(I.getOperand(0)) .addReg(PtrReg) .addImm(0) - .addImm(0) .addImm(0); bool Ret = constrainSelectedInstRegOperands(*Flat, TII, TRI, RBI); diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index b0ac0e689a0..8ba9efd42c7 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -def FLATAtomic : ComplexPattern; +def FLATAtomic : ComplexPattern; //===----------------------------------------------------------------------===// // FLAT classes @@ -62,7 +62,9 @@ class FLAT_Real op, FLAT_Pseudo ps> : bits<8> vdst; bits<1> slc; bits<1> glc; - bits<1> tfe; + + // We don't use tfe right now, and it was removed in gfx9. + bits<1> tfe = 0; // 15-0 is reserved. let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); @@ -79,8 +81,8 @@ class FLAT_Real op, FLAT_Pseudo ps> : class FLAT_Load_Pseudo : FLAT_Pseudo< opName, (outs regClass:$vdst), - (ins VReg_64:$vaddr, GLC:$glc, slc:$slc, tfe:$tfe), - " $vdst, $vaddr$glc$slc$tfe"> { + (ins VReg_64:$vaddr, GLC:$glc, slc:$slc), + " $vdst, $vaddr$glc$slc"> { let has_data = 0; let mayLoad = 1; } @@ -88,8 +90,8 @@ class FLAT_Load_Pseudo : FLAT_Pseudo< class FLAT_Store_Pseudo : FLAT_Pseudo< opName, (outs), - (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc, tfe:$tfe), - " $vaddr, $vdata$glc$slc$tfe"> { + (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc), + " $vaddr, $vdata$glc$slc"> { let mayLoad = 0; let mayStore = 1; let has_vdst = 0; @@ -105,8 +107,8 @@ multiclass FLAT_Atomic_Pseudo< def "" : FLAT_Pseudo , AtomicNoRet { let mayLoad = 1; @@ -119,10 +121,10 @@ multiclass FLAT_Atomic_Pseudo< def _RTN : FLAT_Pseudo , + (atomic (FLATAtomic i64:$vaddr, i1:$slc), data_vt:$vdata))]>, AtomicNoRet { let mayLoad = 1; let mayStore = 1; @@ -311,30 +313,30 @@ def flat_truncstorei16 : flat_st ; // Patterns for global loads with no offset. class FlatLoadPat : Pat < (vt (node i64:$addr)), - (inst $addr, 0, 0, 0) + (inst $addr, 0, 0) >; class FlatLoadAtomicPat : Pat < (vt (node i64:$addr)), - (inst $addr, 1, 0, 0) + (inst $addr, 1, 0) >; class FlatStorePat : Pat < (node vt:$data, i64:$addr), - (inst $addr, $data, 0, 0, 0) + (inst $addr, $data, 0, 0) >; class FlatStoreAtomicPat : Pat < // atomic store follows atomic binop convention so the address comes // first. (node i64:$addr, vt:$data), - (inst $addr, $data, 1, 0, 0) + (inst $addr, $data, 1, 0) >; class FlatAtomicPat : Pat < (vt (node i64:$addr, data_vt:$data)), - (inst $addr, $data, 0, 0) + (inst $addr, $data, 0) >; let Predicates = [isCIVI] in { diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir index 56a9e7022db..2a3d3887ed6 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir @@ -14,7 +14,7 @@ regBankSelected: true # GCN: global_addrspace # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 -# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0, 0 +# GCN: FLAT_LOAD_DWORD [[PTR]], 0, 0 body: | bb.0: diff --git a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir index ea435725bf2..89be3bde94a 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir @@ -15,7 +15,7 @@ regBankSelected: true # GCN: global_addrspace # GCN: [[PTR:%[0-9]+]] = COPY %vgpr0_vgpr1 # GCN: [[VAL:%[0-9]+]] = COPY %vgpr2 -# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0, 0 +# GCN: FLAT_STORE_DWORD [[PTR]], [[VAL]], 0, 0 body: | bb.0: diff --git a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir index 62b47beb125..bc992ed77ff 100644 --- a/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir +++ b/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir @@ -219,19 +219,19 @@ body: | %34 = V_MOV_B32_e32 63, implicit %exec %27 = V_AND_B32_e64 %26, %24, implicit %exec - FLAT_STORE_DWORD %37, %27, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %27, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_AND_B32_e64 %24, %26, implicit %exec - FLAT_STORE_DWORD %37, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %29 = V_AND_B32_e32 %26, %24, implicit %exec - FLAT_STORE_DWORD %37, %29, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %29, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %30 = V_AND_B32_e64 %26, %26, implicit %exec - FLAT_STORE_DWORD %37, %30, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %30, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %31 = V_AND_B32_e64 %34, %34, implicit %exec - FLAT_STORE_DWORD %37, %31, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %37, %31, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM @@ -407,34 +407,34 @@ body: | %27 = S_MOV_B32 -4 %11 = V_LSHLREV_B32_e64 12, %10, implicit %exec - FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %12 = V_LSHLREV_B32_e64 %7, 12, implicit %exec - FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %13 = V_LSHL_B32_e64 %7, 12, implicit %exec - FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %14 = V_LSHL_B32_e64 12, %7, implicit %exec - FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %15 = V_LSHL_B32_e64 12, %24, implicit %exec - FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %22 = V_LSHL_B32_e64 %6, 12, implicit %exec - FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %23 = V_LSHL_B32_e64 %6, 32, implicit %exec - FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %25 = V_LSHL_B32_e32 %6, %6, implicit %exec - FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %26 = V_LSHLREV_B32_e32 11, %24, implicit %exec - FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_LSHL_B32_e32 %27, %6, implicit %exec - FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM @@ -615,34 +615,34 @@ body: | %35 = V_MOV_B32_e32 2, implicit %exec %11 = V_ASHRREV_I32_e64 8, %10, implicit %exec - FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %12 = V_ASHRREV_I32_e64 %8, %10, implicit %exec - FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %13 = V_ASHR_I32_e64 %7, 3, implicit %exec - FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %14 = V_ASHR_I32_e64 7, %32, implicit %exec - FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %15 = V_ASHR_I32_e64 %27, %24, implicit %exec - FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %22 = V_ASHR_I32_e64 %6, 4, implicit %exec - FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %23 = V_ASHR_I32_e64 %6, %33, implicit %exec - FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %25 = V_ASHR_I32_e32 %34, %34, implicit %exec - FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %26 = V_ASHRREV_I32_e32 11, %10, implicit %exec - FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_ASHR_I32_e32 %27, %35, implicit %exec - FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM @@ -824,34 +824,34 @@ body: | %35 = V_MOV_B32_e32 2, implicit %exec %11 = V_LSHRREV_B32_e64 8, %10, implicit %exec - FLAT_STORE_DWORD %20, %11, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %11, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %12 = V_LSHRREV_B32_e64 %8, %10, implicit %exec - FLAT_STORE_DWORD %20, %12, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %12, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %13 = V_LSHR_B32_e64 %7, 3, implicit %exec - FLAT_STORE_DWORD %20, %13, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %13, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %14 = V_LSHR_B32_e64 7, %32, implicit %exec - FLAT_STORE_DWORD %20, %14, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %14, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %15 = V_LSHR_B32_e64 %27, %24, implicit %exec - FLAT_STORE_DWORD %20, %15, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %15, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %22 = V_LSHR_B32_e64 %6, 4, implicit %exec - FLAT_STORE_DWORD %20, %22, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %22, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %23 = V_LSHR_B32_e64 %6, %33, implicit %exec - FLAT_STORE_DWORD %20, %23, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %23, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %25 = V_LSHR_B32_e32 %34, %34, implicit %exec - FLAT_STORE_DWORD %20, %25, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %25, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %26 = V_LSHRREV_B32_e32 11, %10, implicit %exec - FLAT_STORE_DWORD %20, %26, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %26, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) %28 = V_LSHR_B32_e32 %27, %35, implicit %exec - FLAT_STORE_DWORD %20, %28, 0, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) + FLAT_STORE_DWORD %20, %28, 0, 0, implicit %exec, implicit %flat_scr :: (volatile store 4 into %ir.gep.out) S_ENDPGM diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir index c6fe6debd22..ff9fcd1c693 100644 --- a/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -246,15 +246,15 @@ body: | S_BRANCH %bb.1 bb.1: - FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr + FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr + FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, implicit %exec, implicit %flat_scr %vgpr3 = V_MOV_B32_e32 0, implicit %exec S_ENDPGM diff --git a/test/CodeGen/AMDGPU/limit-coalesce.mir b/test/CodeGen/AMDGPU/limit-coalesce.mir index 106a96e32dc..a0d2d6c097a 100644 --- a/test/CodeGen/AMDGPU/limit-coalesce.mir +++ b/test/CodeGen/AMDGPU/limit-coalesce.mir @@ -57,15 +57,15 @@ body: | %4.sub1 = COPY %3.sub0 undef %5.sub0 = COPY %4.sub1 %5.sub1 = COPY %4.sub0 - FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX2 %vgpr0_vgpr1, killed %5, 0, 0, implicit %exec, implicit %flat_scr %6 = IMPLICIT_DEF undef %7.sub0_sub1 = COPY %6 %7.sub2 = COPY %3.sub0 - FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX3 %vgpr0_vgpr1, killed %7, 0, 0, implicit %exec, implicit %flat_scr %8 = IMPLICIT_DEF undef %9.sub0_sub1_sub2 = COPY %8 %9.sub3 = COPY %3.sub0 - FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORDX4 %vgpr0_vgpr1, killed %9, 0, 0, implicit %exec, implicit %flat_scr ... diff --git a/test/CodeGen/AMDGPU/waitcnt.mir b/test/CodeGen/AMDGPU/waitcnt.mir index 38662e83b35..f754415dccb 100644 --- a/test/CodeGen/AMDGPU/waitcnt.mir +++ b/test/CodeGen/AMDGPU/waitcnt.mir @@ -51,21 +51,21 @@ name: flat_zero_waitcnt body: | bb.0: successors: %bb.1 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec S_BRANCH %bb.1 bb.1: successors: %bb.2 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec S_BRANCH %bb.2 bb.2: - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec S_ENDPGM ... @@ -86,11 +86,11 @@ name: single_fallthrough_successor_no_end_block_wait body: | bb.0: successors: %bb.1 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr bb.1: %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec - FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr S_ENDPGM ... --- @@ -114,15 +114,15 @@ name: single_branch_successor_not_next_block body: | bb.0: successors: %bb.2 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, implicit %exec, implicit %flat_scr S_BRANCH %bb.2 bb.1: - FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr8_vgpr9, %vgpr10, 0, 0, implicit %exec, implicit %flat_scr S_ENDPGM bb.2: %vgpr3_vgpr4 = V_LSHLREV_B64 4, %vgpr7_vgpr8, implicit %exec - FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, 0, implicit %exec, implicit %flat_scr + FLAT_STORE_DWORD %vgpr3_vgpr4, %vgpr0, 0, 0, implicit %exec, implicit %flat_scr S_ENDPGM ... diff --git a/test/MC/AMDGPU/flat.s b/test/MC/AMDGPU/flat.s index c6894c35f4d..4e81799fe9f 100644 --- a/test/MC/AMDGPU/flat.s +++ b/test/MC/AMDGPU/flat.s @@ -30,31 +30,6 @@ flat_load_dword v1, v[3:4] glc slc // CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01] // VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01] -flat_load_dword v1, v[3:4] glc tfe -// NOSI: error: -// CI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01] -// VI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x80,0x01] - -flat_load_dword v1, v[3:4] glc slc tfe -// NOSI: error: -// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01] -// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01] - -flat_load_dword v1, v[3:4] slc -// NOSI: error: -// CI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x00,0x01] -// VI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x00,0x01] - -flat_load_dword v1, v[3:4] slc tfe -// NOSI: error: -// CI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01] -// VI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x80,0x01] - -flat_load_dword v1, v[3:4] tfe -// NOSI: error: -// CI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x80,0x01] -// VI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x80,0x01] - flat_store_dword v[3:4], v1 // NOSI: error: // CIVI: flat_store_dword v[3:4], v1 ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00] @@ -67,66 +42,25 @@ flat_store_dword v[3:4], v1 glc slc // NOSI: error: // CIVI: flat_store_dword v[3:4], v1 glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v[3:4], v1 glc tfe -// NOSI: error: -// CIVI: flat_store_dword v[3:4], v1 glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00] - -flat_store_dword v[3:4], v1 glc slc tfe -// NOSI: error: -// CIVI: flat_store_dword v[3:4], v1 glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00] flat_store_dword v[3:4], v1 slc // NOSI: error: // CIVI: flat_store_dword v[3:4], v1 slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00] -flat_store_dword v[3:4], v1 slc tfe -// NOSI: error: -// CIVI: flat_store_dword v[3:4], v1 slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00] - -flat_store_dword v[3:4], v1 tfe -// NOSI: error: -// CIVI: flat_store_dword v[3:4], v1 tfe ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x80,0x00] - // FIXME: For atomic instructions, glc must be placed immediately following // the data regiser. These forms aren't currently supported: // flat_atomic_add v1, v[3:4], v5 slc glc -// flat_atomic_add v1, v[3:4], v5 slc glc tfe -// flat_atomic_add v1, v[3:4], v5 slc tfe glc -// flat_atomic_add v1, v[3:4], v5 tfe glc -// flat_atomic_add v[3:4], v5 tfe glc -// flat_atomic_add v1, v[3:4], v5 tfe glc slc -// flat_atomic_add v1, v[3:4], v5 tfe slc glc flat_atomic_add v1 v[3:4], v5 glc slc // NOSI: error: // CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01] // VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01] -flat_atomic_add v1 v[3:4], v5 glc tfe -// NOSI: error: -// CI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x80,0x01] -// VI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x80,0x01] - -flat_atomic_add v1 v[3:4], v5 glc slc tfe -// NOSI: error: -// CI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01] -// VI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x80,0x01] - flat_atomic_add v[3:4], v5 slc // NOSI: error: // CI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00] // VI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00] -flat_atomic_add v[3:4], v5 slc tfe -// NOSI: error: -// CI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x80,0x00] -// VI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x80,0x00] - -flat_atomic_add v[3:4], v5 tfe -// NOSI: error: -// CI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x80,0x00] -// VI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x80,0x00] - //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// diff --git a/test/MC/Disassembler/AMDGPU/flat_vi.txt b/test/MC/Disassembler/AMDGPU/flat_vi.txt index a7013092b49..bcc39507805 100644 --- a/test/MC/Disassembler/AMDGPU/flat_vi.txt +++ b/test/MC/Disassembler/AMDGPU/flat_vi.txt @@ -9,39 +9,15 @@ # VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01] 0x00 0x00 0x53 0xdc 0x03 0x00 0x00 0x01 -# VI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x80,0x01] -0x00 0x00 0x51 0xdc 0x03 0x00 0x80 0x01 - -# VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01] -0x00 0x00 0x53 0xdc 0x03 0x00 0x80 0x01 - # VI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x00,0x01] 0x00 0x00 0x52 0xdc 0x03 0x00 0x00 0x01 -# VI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x80,0x01] -0x00 0x00 0x52 0xdc 0x03 0x00 0x80 0x01 - -# VI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x80,0x01] -0x00 0x00 0x50 0xdc 0x03 0x00 0x80 0x01 - # VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01] 0x00 0x00 0x0b 0xdd 0x03 0x05 0x00 0x01 -# VI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x80,0x01] -0x00 0x00 0x09 0xdd 0x03 0x05 0x80 0x01 - -# VI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x80,0x01] -0x00 0x00 0x0b 0xdd 0x03 0x05 0x80 0x01 - # VI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00] 0x00 0x00 0x0a 0xdd 0x03 0x05 0x00 0x00 -# VI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x80,0x00] -0x00 0x00 0x0a 0xdd 0x03 0x05 0x80 0x00 - -# VI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x80,0x00] -0x00 0x00 0x08 0xdd 0x03 0x05 0x80 0x00 - # VI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x40,0xdc,0x03,0x00,0x00,0x01] 0x00 0x00 0x40 0xdc 0x03 0x00 0x00 0x01