From 7835305a0d0bb47af6b4ffcabfd5db545ef910b8 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 15 Jul 2019 19:37:34 +0000 Subject: [PATCH] GlobalISel: Implement narrowScalar for vector extract/insert indexes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366113 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 11 +++++ .../legalize-extract-vector-elt.mir | 19 ++++++++ .../GlobalISel/legalize-insert-vector-elt.mir | 46 ++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index d13b1a53f61..bf3cca4115a 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -736,6 +736,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_EXTRACT_VECTOR_ELT: + case TargetOpcode::G_INSERT_VECTOR_ELT: { + if (TypeIdx != 2) + return UnableToLegalize; + + int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3; + Observer.changingInstr(MI); + narrowScalarSrc(MI, NarrowTy, OpIdx); + Observer.changedInstr(MI); + return Legalized; + } } } diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index 2fdde786b14..682eafb2ea4 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -646,3 +646,22 @@ body: | %2:_(s128) = G_EXTRACT_VECTOR_ELT %0, %1 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 ... + +--- +name: extract_vector_elt_v2i32_varidx_i64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + + ; CHECK-LABEL: name: extract_vector_elt_v2i32_varidx_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[TRUNC]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index d55f67262c5..2ca336febe9 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -2,12 +2,12 @@ # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- -name: insert_vector_elt_0_v2i32 +name: insert_vector_elt_0_v2s32 body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-LABEL: name: insert_vector_elt_0_v2i32 + ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -19,3 +19,45 @@ body: | %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $vgpr0_vgpr1 = COPY %3 ... + +--- +name: insert_vector_elt_v2s32_varidx_i64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3_vgpr4 + + ; CHECK-LABEL: name: insert_vector_elt_v2s32_varidx_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr3_vgpr4 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s64) = COPY $vgpr3_vgpr4 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: insert_vector_elt_v16s32_varidx_i64 + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16, $vgpr17_vgpr18 + + ; CHECK-LABEL: name: insert_vector_elt_v16s32_varidx_i64 + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr17_vgpr18 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK: [[IVEC:%[0-9]+]]:_(<16 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[TRUNC]](s32) + ; CHECK: S_ENDPGM 0, implicit [[IVEC]](<16 x s32>) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(s32) = COPY $vgpr16 + %2:_(s64) = COPY $vgpr17_vgpr18 + %3:_(<16 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + S_ENDPGM 0, implicit %3 +... -- 2.49.0