[AMDGPU] Add pseudo "old" source to all DPP instructions

author Connor Abbott <cwabbott0@gmail.com>

Mon, 7 Aug 2017 19:10:56 +0000 (19:10 +0000)

committer Connor Abbott <cwabbott0@gmail.com>

Mon, 7 Aug 2017 19:10:56 +0000 (19:10 +0000)
author Connor Abbott <cwabbott0@gmail.com>
Mon, 7 Aug 2017 19:10:56 +0000 (19:10 +0000)
committer Connor Abbott <cwabbott0@gmail.com>
Mon, 7 Aug 2017 19:10:56 +0000 (19:10 +0000)
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

index 728f3522e3c06eaf1a1448e571e6ea2dd2784521..17e7288ac3a8fc9bc64b20ad437cf6c505af754a 100644 (file)
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4458,6 +4458,11 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
      ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
    }
  
+  // All DPP instructions with at least one source operand have a fake "old"
+  // source at the beginning that's tied to the dst operand. Handle it here.
+  if (Desc.getNumOperands() >= 2)
+    Inst.addOperand(Inst.getOperand(0));
+
    for (unsigned E = Operands.size(); I != E; ++I) {
      AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
      // Add the register arguments
@@ -4480,16 +4485,6 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
-
-  // special case v_mac_{f16, f32}:
-  // it has src2 register operand that is tied to dst operand
-  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_dpp ||
-      Inst.getOpcode() == AMDGPU::V_MAC_F16_dpp) {
-    auto it = Inst.begin();
-    std::advance(
-        it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
-    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
-  }
  }
  
  //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td

index 4b8dff7963de43f24cb522f52b3b0b277eeababe..e86993501fcfa1d6b772375f83f6008280490b81 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1184,8 +1184,9 @@ class getInsVOP3OpSel <RegisterOperand Src0RC,
    );
  }
  
-class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
-                 bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
+class getInsDPP <RegisterOperand DstRC, RegisterClass Src0RC, RegisterClass Src1RC,
+                 int NumSrcArgs, bit HasModifiers,
+                 Operand Src0Mod, Operand Src1Mod> {
  
    dag ret = !if (!eq(NumSrcArgs, 0),
                  // VOP1 without input operands (V_NOP)
@@ -1194,26 +1195,29 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
              !if (!eq(NumSrcArgs, 1),
                !if (!eq(HasModifiers, 1),
                  // VOP1_DPP with modifiers
-                (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
-                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                (ins DstRC:$old, Src0Mod:$src0_modifiers,
+                     Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                       bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
                /* else */,
                  // VOP1_DPP without modifiers
-                (ins Src0RC:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
-                bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
+                (ins DstRC:$old, Src0RC:$src0,
+                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
                /* endif */)
                /* NumSrcArgs == 2 */,
                !if (!eq(HasModifiers, 1),
                  // VOP2_DPP with modifiers
-                (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                (ins DstRC:$old,
+                     Src0Mod:$src0_modifiers, Src0RC:$src0,
                       Src1Mod:$src1_modifiers, Src1RC:$src1,
                       dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                       bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
                /* else */,
                  // VOP2_DPP without modifiers
-                (ins Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
-                row_mask:$row_mask, bank_mask:$bank_mask,
-                bound_ctrl:$bound_ctrl)
+                (ins DstRC:$old,
+                     Src0RC:$src0, Src1RC:$src1, dpp_ctrl:$dpp_ctrl,
+                     row_mask:$row_mask, bank_mask:$bank_mask,
+                     bound_ctrl:$bound_ctrl)
               /* endif */)));
  }
  
@@ -1548,7 +1552,7 @@ class VOPProfile <list<ValueType> _ArgVT> {
                                             getOpSelMod<Src0VT>.ret,
                                             getOpSelMod<Src1VT>.ret,
                                             getOpSelMod<Src2VT>.ret>.ret;
-  field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
+  field dag InsDPP = getInsDPP<DstRCDPP, Src0DPP, Src1DPP, NumSrcArgs,
                                 HasModifiers, Src0ModDPP, Src1ModDPP>.ret;
    field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
                                   HasSDWAOMod, Src0ModSDWA, Src1ModSDWA,
diff --git a/lib/Target/AMDGPU/VOP1Instructions.td b/lib/Target/AMDGPU/VOP1Instructions.td

index 96b33c373f0529fdd0c32ee3830e505257b06eea..f20a20e91cd59fbddcf150afaef1d1d4420fa6a8 100644 (file)
--- a/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/lib/Target/AMDGPU/VOP1Instructions.td
@@ -266,7 +266,8 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
    let Outs = (outs);
    let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
    let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
-  let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+  let InsDPP = (ins DstRC:$vdst, DstRC:$old, Src0RC32:$src0,
+                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
  
    let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0,
@@ -504,8 +505,6 @@ class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
    let Uses = ps.Uses;
    let SchedRW = ps.SchedRW;
    let hasSideEffects = ps.hasSideEffects;
-  let Constraints = ps.Constraints;
-  let DisableEncoding = ps.DisableEncoding;
  
    bits<8> vdst;
    let Inst{8-0}   = 0xfa; // dpp
@@ -659,11 +658,11 @@ let Predicates = [isVI] in {
  def : Pat <
    (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
                        imm:$bound_ctrl)),
-  (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
-                       (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
+  (V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
+                       (as_i32imm $row_mask), (as_i32imm $bank_mask),
+                       (as_i1imm $bound_ctrl))
  >;
  
-
  def : Pat<
    (i32 (anyext i16:$src)),
    (COPY $src)
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td

index 9f3df2b1d435f2259bf1cd3152869127379e161c..29266fbac9214a3c21818f2daf443fc23fe02d0f 100644 (file)
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -209,9 +209,9 @@ class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
    let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
    let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
                         HasModifiers, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret;
-  let InsDPP = (ins Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
+  let InsDPP = (ins DstRCDPP:$old,
+                    Src0ModDPP:$src0_modifiers, Src0DPP:$src0,
                      Src1ModDPP:$src1_modifiers, Src1DPP:$src1,
-                    VGPR_32:$src2, // stub argument
                      dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
  
@@ -282,7 +282,8 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
                       dst_sel:$dst_sel, dst_unused:$dst_unused,
                       src0_sel:$src0_sel, src1_sel:$src1_sel);
  
-  let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0,
+  let InsDPP = (ins DstRCDPP:$old,
+                    Src0Mod:$src0_modifiers, Src0DPP:$src0,
                      Src1Mod:$src1_modifiers, Src1DPP:$src1,
                      dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
@@ -665,8 +666,6 @@ class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
    let Uses = ps.Uses;
    let SchedRW = ps.SchedRW;
    let hasSideEffects = ps.hasSideEffects;
-  let Constraints = ps.Constraints;
-  let DisableEncoding = ps.DisableEncoding;
  
    bits<8> vdst;
    bits<8> src1;
diff --git a/lib/Target/AMDGPU/VOPInstructions.td b/lib/Target/AMDGPU/VOPInstructions.td

index dad76667774c03bf5080683986938513c1475b9f..cb754ef4fa5e80e0dfa2a06c4a196a89046c0d36 100644 (file)
--- a/lib/Target/AMDGPU/VOPInstructions.td
+++ b/lib/Target/AMDGPU/VOPInstructions.td
@@ -510,6 +510,8 @@ class VOP_DPP <string OpName, VOPProfile P> :
    let AssemblerPredicate = !if(P.HasExt, HasDPP, DisableInst);
    let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
                                       AMDGPUAsmVariants.Disable);
+  let Constraints = !if(P.NumSrcArgs, "$old = $vdst", "");
+  let DisableEncoding = !if(P.NumSrcArgs, "$old", "");
    let DecoderNamespace = "DPP";
  }
  
diff --git a/test/CodeGen/AMDGPU/inserted-wait-states.mir b/test/CodeGen/AMDGPU/inserted-wait-states.mir

index bf4c7cbe6ee8dc0f81abe4e9147023d14eaf86fc..15006e5fdca24a9d3ccfa241a99f8974dd7bf9d4 100644 (file)
--- a/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -504,12 +504,12 @@ name: dpp
  body: |
    bb.0:
      %vgpr0 = V_MOV_B32_e32 0, implicit %exec
-    %vgpr1 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec
+    %vgpr1 = V_MOV_B32_dpp %vgpr1, %vgpr0, 0, 15, 15, 0, implicit %exec
      S_BRANCH %bb.1
  
    bb.1:
      implicit %exec, implicit %vcc = V_CMPX_EQ_I32_e32 %vgpr0, %vgpr1, implicit %exec
-    %vgpr3 = V_MOV_B32_dpp %vgpr0, 0, 15, 15, 0, implicit %exec
+    %vgpr3 = V_MOV_B32_dpp %vgpr3, %vgpr0, 0, 15, 15, 0, implicit %exec
      S_ENDPGM
  ...
  ---
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll

index d3051659f360249efbbaa03f3f4ef23c0a64df47..1ee8320f9c362409c5eb4372b68c0ecbc7cd846d 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -5,8 +5,10 @@
  
  ; VI-LABEL: {{^}}dpp_test:
  ; VI: v_mov_b32_e32 v0, s{{[0-9]+}}
+; VI-NOOPT: v_mov_b32_e32 v1, s{{[0-9]+}}
  ; VI: s_nop 1
-; VI: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
+; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11]
+; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11]
  define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
    %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0
    store i32 %tmp0, i32 addrspace(1)* %out
@@ -14,11 +16,14 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
  }
  
  ; VI-LABEL: {{^}}dpp_wait_states:
+; VI-NOOPT: v_mov_b32_e32 [[VGPR1:v[0-9]+]], s{{[0-9]+}}
  ; VI: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}}
  ; VI: s_nop 1
-; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-OPT: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-NOOPT: v_mov_b32_dpp [[VGPR1]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
  ; VI: s_nop 1
-; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
+; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0
  define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) {
    %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0
    %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0
author	Connor Abbott <cwabbott0@gmail.com>
	Mon, 7 Aug 2017 19:10:56 +0000 (19:10 +0000)
committer	Connor Abbott <cwabbott0@gmail.com>
	Mon, 7 Aug 2017 19:10:56 +0000 (19:10 +0000)
lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.td		patch \| blob \| history
lib/Target/AMDGPU/VOP1Instructions.td		patch \| blob \| history
lib/Target/AMDGPU/VOP2Instructions.td		patch \| blob \| history
lib/Target/AMDGPU/VOPInstructions.td		patch \| blob \| history
test/CodeGen/AMDGPU/inserted-wait-states.mir		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll		patch \| blob \| history