[AMDGPU] DPP combiner: recognize identities for more opcodes

author Jay Foad <jay.foad@gmail.com>

Fri, 5 Jul 2019 14:52:48 +0000 (14:52 +0000)

committer Jay Foad <jay.foad@gmail.com>

Fri, 5 Jul 2019 14:52:48 +0000 (14:52 +0000)
author Jay Foad <jay.foad@gmail.com>
Fri, 5 Jul 2019 14:52:48 +0000 (14:52 +0000)
committer Jay Foad <jay.foad@gmail.com>
Fri, 5 Jul 2019 14:52:48 +0000 (14:52 +0000)
diff --git a/lib/Target/AMDGPU/GCNDPPCombine.cpp b/lib/Target/AMDGPU/GCNDPPCombine.cpp

index 536dc54a65c593a4bf3ac40d4ab22c9a0904478d..7348b5b56c8b37da9212ea012b6aeb489e3c3d1a 100644 (file)
--- a/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -253,33 +253,46 @@ static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
    switch (OrigMIOp) {
    default: break;
    case AMDGPU::V_ADD_U32_e32:
+  case AMDGPU::V_ADD_U32_e64:
    case AMDGPU::V_ADD_I32_e32:
+  case AMDGPU::V_ADD_I32_e64:
    case AMDGPU::V_OR_B32_e32:
+  case AMDGPU::V_OR_B32_e64:
    case AMDGPU::V_SUBREV_U32_e32:
+  case AMDGPU::V_SUBREV_U32_e64:
    case AMDGPU::V_SUBREV_I32_e32:
+  case AMDGPU::V_SUBREV_I32_e64:
    case AMDGPU::V_MAX_U32_e32:
+  case AMDGPU::V_MAX_U32_e64:
    case AMDGPU::V_XOR_B32_e32:
+  case AMDGPU::V_XOR_B32_e64:
      if (OldOpnd->getImm() == 0)
        return true;
      break;
    case AMDGPU::V_AND_B32_e32:
+  case AMDGPU::V_AND_B32_e64:
    case AMDGPU::V_MIN_U32_e32:
+  case AMDGPU::V_MIN_U32_e64:
      if (static_cast<uint32_t>(OldOpnd->getImm()) ==
          std::numeric_limits<uint32_t>::max())
        return true;
      break;
    case AMDGPU::V_MIN_I32_e32:
+  case AMDGPU::V_MIN_I32_e64:
      if (static_cast<int32_t>(OldOpnd->getImm()) ==
          std::numeric_limits<int32_t>::max())
        return true;
      break;
    case AMDGPU::V_MAX_I32_e32:
+  case AMDGPU::V_MAX_I32_e64:
      if (static_cast<int32_t>(OldOpnd->getImm()) ==
          std::numeric_limits<int32_t>::min())
        return true;
      break;
    case AMDGPU::V_MUL_I32_I24_e32:
+  case AMDGPU::V_MUL_I32_I24_e64:
    case AMDGPU::V_MUL_U32_U24_e32:
+  case AMDGPU::V_MUL_U32_U24_e64:
      if (OldOpnd->getImm() == 1)
        return true;
      break;
diff --git a/test/CodeGen/AMDGPU/dpp_combine.mir b/test/CodeGen/AMDGPU/dpp_combine.mir

index fbeadcf2e0081701216781c3ef1e981a164c2ae3..d98cde5cff4b32b1aee5884e339703aaec994030 100644 (file)
--- a/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -1,4 +1,4 @@
-# RUN: llc -march=amdgcn -mcpu=tonga  -run-pass=gcn-dpp-combine  -o - %s | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=gcn-dpp-combine -o - %s | FileCheck %s
  
  ---
  # old is undefined: only combine when masks are fully enabled and
@@ -328,6 +328,30 @@ body:             |
      %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $exec
  ...
  
+# check for e64 modifiers
+# CHECK-LABEL: name: add_u32_e64
+# CHECK: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec
+# CHECK: %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
+
+name: add_u32_e64
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+
+    ; this should be combined as all modifiers are default
+    %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
+
+    ; this shouldn't be combined as clamp is set
+    %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
+...
+
  # tests on sequences of dpp consumers
  # CHECK-LABEL: name: dpp_seq
  # CHECK: %4:vgpr_32 = V_ADD_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
author	Jay Foad <jay.foad@gmail.com>
	Fri, 5 Jul 2019 14:52:48 +0000 (14:52 +0000)
committer	Jay Foad <jay.foad@gmail.com>
	Fri, 5 Jul 2019 14:52:48 +0000 (14:52 +0000)
lib/Target/AMDGPU/GCNDPPCombine.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/dpp_combine.mir		patch \| blob \| history