From: Craig Topper Date: Sun, 8 Oct 2017 01:33:40 +0000 (+0000) Subject: [X86] Remove ISD::INSERT_SUBVECTOR handling from combineBitcastForMaskedOp. Add isel... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ecabb5f6bd1f119484ba2afc6ac3511ad43a2ca6;p=llvm [X86] Remove ISD::INSERT_SUBVECTOR handling from combineBitcastForMaskedOp. Add isel patterns to make up for it. This will allow for some flexibility in canonicalizing bitcasts around insert_subvector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315160 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bf0cd895676..7305af6b39d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -30465,29 +30465,6 @@ static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG, return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } - case ISD::INSERT_SUBVECTOR: { - unsigned EltSize = EltVT.getSizeInBits(); - if (EltSize != 32 && EltSize != 64) - return false; - MVT OpEltVT = Op.getSimpleValueType().getVectorElementType(); - // Only change element size, not type. - if (EltVT.isInteger() != OpEltVT.isInteger()) - return false; - uint64_t Imm = Op.getConstantOperandVal(2); - Imm = (Imm * OpEltVT.getSizeInBits()) / EltSize; - SDValue Op0 = DAG.getBitcast(VT, Op.getOperand(0)); - DCI.AddToWorklist(Op0.getNode()); - // Op1 needs to be bitcasted to a smaller vector with the same element type. - SDValue Op1 = Op.getOperand(1); - MVT Op1VT = MVT::getVectorVT(EltVT, - Op1.getSimpleValueType().getSizeInBits() / EltSize); - Op1 = DAG.getBitcast(Op1VT, Op1); - DCI.AddToWorklist(Op1.getNode()); - DCI.CombineTo(OrigOp.getNode(), - DAG.getNode(Opcode, DL, VT, Op0, Op1, - DAG.getIntPtrConstant(Imm, DL))); - return true; - } case X86ISD::SUBV_BROADCAST: { unsigned EltSize = EltVT.getSizeInBits(); if (EltSize != 32 && EltSize != 64) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index fe2fe537871..929faaf23e3 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -615,6 +615,139 @@ defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; + +multiclass vinsert_for_mask_cast p> { +let Predicates = p in { + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT From.RC:$src2), + (iPTR imm))), + Cast.RC:$src0)), + (!cast(InstrStr#"rrk") + Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT + (bitconvert + (From.LdFrag addr:$src2))), + (iPTR imm))), + Cast.RC:$src0)), + (!cast(InstrStr#"rmk") + Cast.RC:$src0, Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; + + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT From.RC:$src2), + (iPTR imm))), + Cast.ImmAllZerosV)), + (!cast(InstrStr#"rrkz") + Cast.KRCWM:$mask, To.RC:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; + def : Pat<(Cast.VT + (vselect Cast.KRCWM:$mask, + (bitconvert + (vinsert_insert:$ins (To.VT To.RC:$src1), + (From.VT + (bitconvert + (From.LdFrag addr:$src2))), + (iPTR imm))), + Cast.ImmAllZerosV)), + (!cast(InstrStr#"rmkz") + Cast.KRCWM:$mask, To.RC:$src1, addr:$src2, + (INSERT_get_vinsert_imm To.RC:$ins))>; +} +} + +defm : vinsert_for_mask_cast<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, + v8f32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4f32x_info, v8f32x_info, + v4f64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; + +defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, + v8i32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, + v8i32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, + v8i32x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v4i32x_info, v8i32x_info, + v4i64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v8i16x_info, v16i16x_info, + v4i64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z256", v16i8x_info, v32i8x_info, + v4i64x_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI, HasVLX]>; + +defm : vinsert_for_mask_cast<"VINSERTF32x4Z", v2f64x_info, v8f64_info, + v16f32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTF64x2Z", v4f32x_info, v16f32_info, + v8f64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; + +defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v2i64x_info, v8i64_info, + v16i32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v8i16x_info, v32i16_info, + v16i32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI32x4Z", v16i8x_info, v64i8_info, + v16i32_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v4i32x_info, v16i32_info, + v8i64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v8i16x_info, v32i16_info, + v8i64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI64x2Z", v16i8x_info, v64i8_info, + v8i64_info, vinsert128_insert, + INSERT_get_vinsert128_imm, [HasDQI]>; + +defm : vinsert_for_mask_cast<"VINSERTF32x8Z", v4f64x_info, v8f64_info, + v16f32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTF64x4Z", v8f32x_info, v16f32_info, + v8f64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; + +defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v4i64x_info, v8i64_info, + v16i32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v16i16x_info, v32i16_info, + v16i32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI32x8Z", v32i8x_info, v64i8_info, + v16i32_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasDQI]>; +defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v8i32x_info, v16i32_info, + v8i64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v16i16x_info, v32i16_info, + v8i64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; +defm : vinsert_for_mask_cast<"VINSERTI64x4Z", v32i8x_info, v64i8_info, + v8i64_info, vinsert256_insert, + INSERT_get_vinsert256_imm, [HasAVX512]>; + // vinsertps - insert f32 to XMM let ExeDomain = SSEPackedSingle in { def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),