From: Matt Arsenault Date: Wed, 20 Mar 2019 20:41:34 +0000 (+0000) Subject: AMDGPU: Don't look for constant in insert/extract_vector_elt regbankselect X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f2fd57849926bd654366a9fc5efe5dcdb81a6dfd;p=llvm AMDGPU: Don't look for constant in insert/extract_vector_elt regbankselect The constantness shouldn't change the register bank choice. We also don't need to restrict this to only indexing VGPRs, since it's possible to index SGPRs (but SelectionDAG made using this difficult). Allow directly indexing SGPRs when appropriate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@356611 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 0d90481660f..42742a649b3 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -52,24 +52,6 @@ AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI) } -static bool isConstant(const MachineOperand &MO, int64_t &C) { - const MachineFunction *MF = MO.getParent()->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF->getRegInfo(); - const MachineInstr *Def = MRI.getVRegDef(MO.getReg()); - if (!Def) - return false; - - if (Def->getOpcode() == AMDGPU::G_CONSTANT) { - C = Def->getOperand(1).getCImm()->getSExtValue(); - return true; - } - - if (Def->getOpcode() == AMDGPU::COPY) - return isConstant(Def->getOperand(1), C); - - return false; -} - unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst, const RegisterBank &Src, unsigned Size) const { @@ -816,42 +798,35 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case AMDGPU::G_EXTRACT_VECTOR_ELT: { - unsigned IdxOp = 2; - int64_t Imm; - // XXX - Do we really need to fully handle these? The constant case should - // be legalized away before RegBankSelect? - - unsigned OutputBankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ? + unsigned OutputBankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; - + unsigned SrcSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + unsigned IdxSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); unsigned IdxBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); - OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits()); - OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, MRI.getType(MI.getOperand(1).getReg()).getSizeInBits()); + + OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, SrcSize); + OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, SrcSize); // The index can be either if the source vector is VGPR. - OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits()); + OpdsMapping[2] = AMDGPU::getValueMapping(IdxBank, IdxSize); break; } case AMDGPU::G_INSERT_VECTOR_ELT: { - // XXX - Do we really need to fully handle these? The constant case should - // be legalized away before RegBankSelect? - - int64_t Imm; - - unsigned IdxOp = MI.getOpcode() == AMDGPU::G_EXTRACT_VECTOR_ELT ? 2 : 3; - unsigned BankID = isSALUMapping(MI) && isConstant(MI.getOperand(IdxOp), Imm) ? - AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; - - + unsigned OutputBankID = isSALUMapping(MI) ? + AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; - // TODO: Can do SGPR indexing, which would obviate the need for the - // isConstant check. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - unsigned Size = getSizeInBits(MI.getOperand(i).getReg(), MRI, *TRI); - OpdsMapping[i] = AMDGPU::getValueMapping(BankID, Size); - } + unsigned VecSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + unsigned InsertSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + unsigned IdxSize = MRI.getType(MI.getOperand(3).getReg()).getSizeInBits(); + unsigned InsertEltBank = getRegBankID(MI.getOperand(2).getReg(), MRI, *TRI); + unsigned IdxBank = getRegBankID(MI.getOperand(3).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(OutputBankID, VecSize); + OpdsMapping[1] = AMDGPU::getValueMapping(OutputBankID, VecSize); + OpdsMapping[2] = AMDGPU::getValueMapping(InsertEltBank, InsertSize); + // The index can be either if the source vector is VGPR. + OpdsMapping[3] = AMDGPU::getValueMapping(IdxBank, IdxSize); break; } case AMDGPU::G_UNMERGE_VALUES: { diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir index 558129ee7f8..b7dadd8367f 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract-vector-elt.mir @@ -1,39 +1,76 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- -name: extract_vector_elt_0_v2i32_s +name: extract_vector_elt_v16i32_ss legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_vector_elt_0_v2i32_s - ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s32>) = COPY $sgpr0_sgpr1 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16 + ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) ; CHECK: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<2 x s32>) = COPY $sgpr0_sgpr1 - %1:_(s32) = G_CONSTANT i32 0 + %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(s32) = COPY $sgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 $vgpr0 = COPY %2 ... +--- +name: extract_vector_elt_v16i32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>) + ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: extract_vector_elt_v16i32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) + ; CHECK: $vgpr0 = COPY [[EVEC]](s32) + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 + $vgpr0 = COPY %2 +... --- -name: extract_vector_elt_0_v4i32_s +name: extract_vector_elt_v16i32_vv legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-LABEL: name: extract_vector_elt_0_v4i32_s - ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s32) + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16 + ; CHECK-LABEL: name: extract_vector_elt_v16i32_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16 + ; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32) ; CHECK: $vgpr0 = COPY [[EVEC]](s32) - %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = G_CONSTANT i32 0 + %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + %1:_(s32) = COPY $vgpr16 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 $vgpr0 = COPY %2 ... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir index ea02e5fab21..dd77ff1afed 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-insert-vector-elt.mir @@ -1,111 +1,111 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- -name: insert_vector_elt_v4i32_s_s_k +name: insert_vector_elt_v4i32_s_s_s legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5 - ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_k + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $sgpr5 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[C]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK: [[IVEC:%[0-9]+]]:sgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $sgpr4 + %2:_(s32) = COPY $sgpr5 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_v4i32_v_s_k +name: insert_vector_elt_v4i32_v_s_s legalized: true body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5 - ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_k + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_v_s_s ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_v4i32_s_v_k +name: insert_vector_elt_v4i32_s_v_s legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr5 - ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_k + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $sgpr4 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY2]], [[COPY1]](s32), [[COPY3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $vgpr2 - %2:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr4 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_var_v4i32_s_s_s +name: insert_vector_elt_v4i32_s_s_v legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $sgpr6 - ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_s + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_s_v ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY5]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = COPY $sgpr6 + %1:_(s32) = COPY $sgpr4 + %2:_(s32) = COPY $vgpr0 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... --- -name: insert_vector_elt_var_v4i32_s_s_v +name: insert_vector_elt_v4i32_s_v_v legalized: true body: | bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, $vgpr6 - ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_s_s_v + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: insert_vector_elt_v4i32_s_v_v ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK: [[COPY3:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[COPY]](<4 x s32>) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY4]](s32), [[COPY2]](s32) + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY3]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = COPY $vgpr6 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... @@ -116,17 +116,38 @@ legalized: true body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr5, $vgpr6 + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr4, $vgpr0 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_s_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) - ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY3]](s32), [[COPY2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) + %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %1:_(s32) = COPY $sgpr4 + %2:_(s32) = COPY $vgpr0 + %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 + $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 +... + +--- +name: insert_vector_elt_var_v4i32_v_v_s +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $sgpr0 + + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_s + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $sgpr5 - %2:_(s32) = COPY $vgpr6 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $sgpr0 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %3 ... @@ -137,16 +158,17 @@ legalized: true body: | bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr5, $vgpr6 + liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4, $vgpr5 + ; CHECK-LABEL: name: insert_vector_elt_var_v4i32_v_v_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr6 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 ; CHECK: [[IVEC:%[0-9]+]]:vgpr(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[COPY2]](s32) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[IVEC]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s32) = COPY $vgpr5 - %2:_(s32) = COPY $vgpr6 + %1:_(s32) = COPY $vgpr4 + %2:_(s32) = COPY $vgpr5 %3:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %2 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %3 ...