From: Craig Topper Date: Sat, 4 Feb 2017 23:26:46 +0000 (+0000) Subject: [X86] Add support for folding (insert_subvector vec1, (extract_subvector vec2, idx1... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=1cfb6c6e52e0f9e3ebfba92731f87e5f7304afe1;p=llvm [X86] Add support for folding (insert_subvector vec1, (extract_subvector vec2, idx1), idx1) -> (blendi vec2, vec1). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@294112 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f51920f5e6e..2090d32d08c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34121,8 +34121,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, // We are still creating an INSERT_SUBVECTOR below with an undef node to // extend the subvector to the size of the result vector. Make sure that // we are not recursing on that node by checking for undef here. - if (IdxVal == 0 && OpVT.is256BitVector() && SubVecVT.is128BitVector() && - !Vec.isUndef()) { + if (IdxVal == 0 && OpVT.is256BitVector() && !Vec.isUndef()) { SDValue Vec256 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT), SubVec, N->getOperand(2)); @@ -34144,6 +34143,30 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, return DAG.getBitcast(OpVT, Vec256); } + // If we're inserting into the upper half of a 256-bit vector with a vector + // that was extracted from the upper half of a 256-bit vector, we should + // use a blend instead. + if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && OpVT.is256BitVector() && + SubVec.getOperand(0).getSimpleValueType() == OpVT && + Idx == SubVec.getOperand(1) && IdxVal == OpVT.getVectorNumElements()/2) { + + // Integers must be cast to 32-bit because there is only vpblendd; + // vpblendw can't be used for this because it has a handicapped mask. + // If we don't have AVX2, then cast to float. Using a wrong domain blend + // is still more efficient than using the wrong domain vinsertf128 that + // will be created by InsertSubVector(). + MVT CastVT = OpVT; + if (OpVT.isInteger()) + CastVT = Subtarget.hasAVX2() ? MVT::v8i32 : MVT::v8f32; + + // The blend instruction, and therefore its mask, depend on the data type. + unsigned MaskVal = CastVT.getScalarSizeInBits() == 64 ? 0x0c : 0xf0; + SDValue Mask = DAG.getConstant(MaskVal, dl, MVT::i8); + Vec = DAG.getNode(X86ISD::BLENDI, dl, CastVT, DAG.getBitcast(CastVT, Vec), + DAG.getBitcast(CastVT, SubVec.getOperand(0)), Mask); + return DAG.getBitcast(OpVT, Vec); + } + // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte // load: // (insert_subvector (insert_subvector undef, (load16 addr), 0), diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll index f7f54a01d7f..280b218b8b5 100644 --- a/test/CodeGen/X86/avx-vperm2x128.ll +++ b/test/CodeGen/X86/avx-vperm2x128.ll @@ -466,8 +466,7 @@ define <4 x i64> @shuffle_v4i64_67zz(<4 x i64> %a, <4 x i64> %b) { ; AVX1: ## BB#0: ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero ; AVX1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v4i64_67zz: diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll index 682a34d3cdb..eb036353eaf 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -2653,8 +2653,7 @@ define <16 x i16> @shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_1 ; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,2,3,4,5,6,7] ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4,5,6,7] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v16i16_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15: diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll index 8b0e25ce43f..31563e82d37 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1089,8 +1089,7 @@ define <32 x i8> @shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_ ; AVX1-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: ; AVX1: # BB#0: ; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[1],zero,xmm0[2],zero,xmm0[4,u,6,7,8,9,10,11,12,13,14,15] -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: shuffle_v32i8_01_zz_02_zz_04_uu_06_07_08_09_10_11_12_13_14_15_u6_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31: