]> granicus.if.org Git - llvm/commitdiff
AMDGPU/GlobalISel: Clamp max implicit_def elements
authorMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 25 Feb 2019 20:46:06 +0000 (20:46 +0000)
committerMatt Arsenault <Matthew.Arsenault@amd.com>
Mon, 25 Feb 2019 20:46:06 +0000 (20:46 +0000)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354818 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir

index ce09d4ac904c40106c93173c516298b640642e0a..d073e4372158eefaa87c93423c8b3e6d67a91db9 100644 (file)
@@ -204,7 +204,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
     .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
     .clampScalarOrElt(0, S32, S512)
     .legalIf(isMultiple32(0))
-    .widenScalarToNextPow2(0, 32);
+    .widenScalarToNextPow2(0, 32)
+    .clampMaxNumElements(0, S32, 16);
 
 
   // FIXME: i1 operands to intrinsics should always be legal, but other i1
index e6d94ee05bab3a8b8a0206c6e14d0ef90e6fc0e9..98b4a111910d4104821f8249af18aa75e9a1ca24 100644 (file)
@@ -212,6 +212,92 @@ body: |
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0
 ...
 
+---
+name: test_implicit_def_v5s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_implicit_def_v5s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: S_NOP 0, implicit [[DEF]](<5 x s32>)
+    %0:_(<5 x s32>) = G_IMPLICIT_DEF
+    S_NOP 0, implicit %0
+...
+
+---
+name: test_implicit_def_v6s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_implicit_def_v6s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: S_NOP 0, implicit [[DEF]](<6 x s32>)
+    %0:_(<6 x s32>) = G_IMPLICIT_DEF
+    S_NOP 0, implicit %0
+...
+
+---
+name: test_implicit_def_v7s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_implicit_def_v7s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: S_NOP 0, implicit [[DEF]](<7 x s32>)
+    %0:_(<7 x s32>) = G_IMPLICIT_DEF
+    S_NOP 0, implicit %0
+...
+
+---
+name: test_implicit_def_v8s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_implicit_def_v8s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x s32>)
+    %0:_(<8 x s32>) = G_IMPLICIT_DEF
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0
+...
+
+---
+name: test_implicit_def_v16s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_implicit_def_v16s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x s32>)
+    %0:_(<16 x s32>) = G_IMPLICIT_DEF
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0
+...
+
+---
+name: test_implicit_def_v17s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_implicit_def_v17s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: S_NOP 0, implicit [[DEF]](<17 x s32>)
+    %0:_(<17 x s32>) = G_IMPLICIT_DEF
+    S_NOP 0, implicit %0
+...
+
+---
+name: test_implicit_def_v32s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_implicit_def_v32s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF1]](<16 x s32>)
+    ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<32 x s32>)
+    %0:_(<32 x s32>) = G_IMPLICIT_DEF
+    S_NOP 0, implicit %0
+...
+
 ---
 name: test_implicit_def_v2s1
 body: |