]> granicus.if.org Git - llvm/commitdiff
AMDGPU/GlobalISel: Handle more G_INSERT cases
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 7 Oct 2019 19:16:26 +0000 (19:16 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 7 Oct 2019 19:16:26 +0000 (19:16 +0000)
Start manually writing a table to get the subreg index. TableGen
should probably generate this, but I'm not sure what it looks like in
the arbitrary case where subregisters are allowed to not fully cover
the super-registers.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373947 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
lib/Target/AMDGPU/AMDGPURegisterInfo.h
test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir

index aa165d4ce21e9eaeefd24f0497f79d550112b155..afdeacc42910cec6f7589e9f39de089a091d2fae 100644 (file)
@@ -555,48 +555,6 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
   return false;
 }
 
-// FIXME: TableGen should generate something to make this manageable for all
-// register classes. At a minimum we could use the opposite of
-// composeSubRegIndices and go up from the base 32-bit subreg.
-static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI,
-                                          unsigned Size, unsigned Offset) {
-  switch (Size) {
-  case 32:
-    return TRI.getSubRegFromChannel(Offset / 32);
-  case 64: {
-    switch (Offset) {
-    case 0:
-      return AMDGPU::sub0_sub1;
-    case 32:
-      return AMDGPU::sub1_sub2;
-    case 64:
-      return AMDGPU::sub2_sub3;
-    case 96:
-      return AMDGPU::sub4_sub5;
-    case 128:
-      return AMDGPU::sub5_sub6;
-    case 160:
-      return AMDGPU::sub7_sub8;
-      // FIXME: Missing cases up to 1024 bits
-    default:
-      return AMDGPU::NoSubRegister;
-    }
-  }
-  case 96: {
-    switch (Offset) {
-    case 0:
-      return AMDGPU::sub0_sub1_sub2;
-    case 32:
-      return AMDGPU::sub1_sub2_sub3;
-    case 64:
-      return AMDGPU::sub2_sub3_sub4;
-    }
-  }
-  default:
-    return AMDGPU::NoSubRegister;
-  }
-}
-
 bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
 
@@ -612,7 +570,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
   if (Offset % 32 != 0)
     return false;
 
-  unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset);
+  unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
   if (SubReg == AMDGPU::NoSubRegister)
     return false;
 
index 7cffdf1a4dcf9e5cce223b53d7bd539c9a6b34a5..9806e6b0714f67d4231374e40714a3be16a32937 100644 (file)
@@ -26,19 +26,59 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
 // they are not supported at this time.
 //===----------------------------------------------------------------------===//
 
-unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) {
-  static const unsigned SubRegs[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
-    AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
-    AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
-    AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
-    AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24,
-    AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29,
-    AMDGPU::sub30, AMDGPU::sub31
-  };
-
-  assert(Channel < array_lengthof(SubRegs));
-  return SubRegs[Channel];
+// Table of NumRegs sized pieces at every 32-bit offset.
+static const uint16_t SubRegFromChannelTable[][32] = {
+  { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+    AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+    AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+    AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+    AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31
+  },
+  {
+    AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4,
+    AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8,
+    AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
+    AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16,
+    AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20,
+    AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
+    AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28,
+    AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister
+  },
+  {
+    AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
+    AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
+    AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
+    AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
+    AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
+    AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
+    AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
+    AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+  },
+  {
+    AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
+    AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
+    AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
+    AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
+    AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
+    AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
+    AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
+    AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+  }
+};
+
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) {
+  const unsigned NumRegIndex = NumRegs - 1;
+
+  assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
+         "Not implemented");
+  assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
+  return SubRegFromChannelTable[NumRegIndex][Channel];
 }
 
 void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
index 3453a8c1b0b396fc063dee50bcd3c3b23b57c4eb..9e713ca804a112879ab94755f769ad7eed77e97d 100644 (file)
@@ -28,7 +28,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
 
   /// \returns the sub reg enum value for the given \p Channel
   /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
-  static unsigned getSubRegFromChannel(unsigned Channel);
+  static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
 
   void reserveRegisterTuples(BitVector &, unsigned Reg) const;
 };
index 3cd1b463b579077a159f6f80a83f10b3714245b4..c120c961741282779d3bff80085186f93b470219 100644 (file)
@@ -303,41 +303,46 @@ body: |
 
 ---
 
-name:            insert_s_s256_s_s64_96
+name:            insert_s_v256_v_s64_96
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9
+    ; CHECK-LABEL: name: insert_s_v256_v_s64_96
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s64) = COPY $vgpr8_vgpr9
+    %2:vgpr(s256) = G_INSERT %0, %1, 96
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_s_s256_s_s64_128
 legalized:       true
 regBankSelected: true
 
 body: |
   bb.0:
     liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
-    ; CHECK-LABEL: name: insert_s_s256_s_s64_96
+    ; CHECK-LABEL: name: insert_s_s256_s_s64_128
     ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
     ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5
     ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
     %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-    %1:sgpr(s64) = COPY $sgpr8_sgpr9
-    %2:sgpr(s256) = G_INSERT %0, %1, 96
+    %1:sgpr(s64) = COPY $sgpr4_sgpr5
+    %2:sgpr(s256) = G_INSERT %0, %1, 128
     S_ENDPGM 0, implicit %2
 ...
 
 # ---
 
-# name:            insert_s_s256_s_s64_128
-# legalized:       true
-# regBankSelected: true
-
-# body: |
-#   bb.0:
-#     liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
-#     %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-#     %1:sgpr(s64) = COPY $sgpr4_sgpr5
-#     %2:sgpr(s256) = G_INSERT %0, %1, 128
-#     S_ENDPGM 0, implicit %2
-# ...
-
-# ---
-
 # name:            insert_s_s256_s_s64_160
 # legalized:       true
 # regBankSelected: true
@@ -450,3 +455,108 @@ body: |
     %2:sgpr(s160) = G_INSERT %0, %1, 64
     S_ENDPGM 0, implicit %2
 ...
+
+---
+
+name:            insert_s_s256_s_s128_0
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11
+
+    ; CHECK-LABEL: name: insert_s_s256_s_s128_0
+    ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    %1:sgpr(s128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11
+    %2:sgpr(s256) = G_INSERT %0, %1, 0
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_32
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_32
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3_sub4
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 32
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_64
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_64
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4_sub5
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 64
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_96
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_96
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4_sub5_sub6
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 96
+    S_ENDPGM 0, implicit %2
+...
+
+---
+
+name:            insert_v_s256_v_s128_128
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins:  $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+    ; CHECK-LABEL: name: insert_v_s256_v_s128_128
+    ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5_sub6_sub7
+    ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+    %2:vgpr(s256) = G_INSERT %0, %1, 128
+    S_ENDPGM 0, implicit %2
+...