From da91f4c26ac599d43e18d20d0246ba738c465fbd Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 25 Feb 2019 20:46:06 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Clamp max implicit_def elements git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354818 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 3 +- .../GlobalISel/legalize-implicit-def.mir | 86 +++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index ce09d4ac904..d073e437215 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -204,7 +204,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .moreElementsIf(isSmallOddVector(0), oneMoreElement(0)) .clampScalarOrElt(0, S32, S512) .legalIf(isMultiple32(0)) - .widenScalarToNextPow2(0, 32); + .widenScalarToNextPow2(0, 32) + .clampMaxNumElements(0, S32, 16); // FIXME: i1 operands to intrinsics should always be legal, but other i1 diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index e6d94ee05ba..98b4a111910 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -212,6 +212,92 @@ body: | $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... +--- +name: test_implicit_def_v5s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v5s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK: S_NOP 0, implicit [[DEF]](<5 x s32>) + %0:_(<5 x s32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0 +... + +--- +name: test_implicit_def_v6s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v6s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF + ; CHECK: S_NOP 0, implicit [[DEF]](<6 x s32>) + %0:_(<6 x s32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0 +... + +--- +name: test_implicit_def_v7s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v7s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF + ; CHECK: S_NOP 0, implicit [[DEF]](<7 x s32>) + %0:_(<7 x s32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0 +... + +--- +name: test_implicit_def_v8s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v8s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x s32>) + %0:_(<8 x s32>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0 +... + +--- +name: test_implicit_def_v16s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v16s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x s32>) + %0:_(<16 x s32>) = G_IMPLICIT_DEF + $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0 +... + +--- +name: test_implicit_def_v17s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v17s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF + ; CHECK: S_NOP 0, implicit [[DEF]](<17 x s32>) + %0:_(<17 x s32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0 +... + +--- +name: test_implicit_def_v32s32 +body: | + bb.0: + + ; CHECK-LABEL: name: test_implicit_def_v32s32 + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF1]](<16 x s32>) + ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<32 x s32>) + %0:_(<32 x s32>) = G_IMPLICIT_DEF + S_NOP 0, implicit %0 +... + --- name: test_implicit_def_v2s1 body: | -- 2.50.1