From: Matt Arsenault Date: Mon, 15 Jul 2019 19:43:04 +0000 (+0000) Subject: AMDGPU/GlobalISel: Custom legalize G_INSERT_VECTOR_ELT X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e86254e20edd06ef03ee3bf9b374a217350e7a21;p=llvm AMDGPU/GlobalISel: Custom legalize G_INSERT_VECTOR_ELT git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366116 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 9bee38f1424..3d1f7f404c9 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -793,7 +793,7 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI, case TargetOpcode::G_EXTRACT_VECTOR_ELT: return legalizeExtractVectorElt(MI, MRI, MIRBuilder); case TargetOpcode::G_INSERT_VECTOR_ELT: - return true; // TODO + return legalizeInsertVectorElt(MI, MRI, MIRBuilder); default: return false; } @@ -1154,6 +1154,36 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt( return true; } +bool AMDGPULegalizerInfo::legalizeInsertVectorElt( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + // TODO: Should move some of this into LegalizerHelper. + + // TODO: Promote dynamic indexing of s16 to s32 + // TODO: Dynamic s64 indexing is only legal for SGPR. + Optional IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI); + if (!IdxVal) // Dynamic case will be selected to register indexing. + return true; + + Register Dst = MI.getOperand(0).getReg(); + Register Vec = MI.getOperand(1).getReg(); + Register Ins = MI.getOperand(2).getReg(); + + LLT VecTy = MRI.getType(Vec); + LLT EltTy = VecTy.getElementType(); + assert(EltTy == MRI.getType(Ins)); + + B.setInstr(MI); + + if (IdxVal.getValue() < VecTy.getNumElements()) + B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits()); + else + B.buildUndef(Dst); + + MI.eraseFromParent(); + return true; +} + // Return the use branch instruction, otherwise null if the usage is invalid. static MachineInstr *verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI) { diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h index b858342494d..3f1cc1d265d 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -53,6 +53,8 @@ public: MachineIRBuilder &MIRBuilder) const; bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeInsertVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder) const; Register getLiveInRegister(MachineRegisterInfo &MRI, Register Reg, LLT Ty) const; diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index 2ca336febe9..75759b3e754 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -10,9 +10,8 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>) + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 0 @@ -20,6 +19,42 @@ body: | $vgpr0_vgpr1 = COPY %3 ... +--- +name: insert_vector_elt_1_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: insert_vector_elt_1_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 + ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + +--- +name: insert_vector_elt_2_v2s32 + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-LABEL: name: insert_vector_elt_2_v2s32 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %1:_(s32) = COPY $vgpr2 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $vgpr0_vgpr1 = COPY %3 +... + --- name: insert_vector_elt_v2s32_varidx_i64