From e9ea05106521cc146d86323d9e8858b2299557bf Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 1 Feb 2019 15:31:01 +0000 Subject: [PATCH] [X86][AVX] Combine INSERT_SUBVECTOR(SRC0, BITCAST(SHUFFLE(EXTRACT_SUBVECTOR(SRC1))) Enable peeking through one use bitcasts to the subvector shuffle. This still depends on the subvector being the same scalar-size but D57514 has already helped with the more tricky patterns git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352879 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 ++++--- test/CodeGen/X86/vector-shuffle-256-v8.ll | 25 +++++++++++++++-------- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cc00970569e..36d3fe1dd60 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6592,7 +6592,6 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, case ISD::INSERT_SUBVECTOR: { // Handle INSERT_SUBVECTOR(SRC0, SHUFFLE(EXTRACT_SUBVECTOR(SRC1)) where // SRC0/SRC1 are both of the same valuetype VT. - // TODO - add peekThroughOneUseBitcasts support. SDValue Src = N.getOperand(0); SDValue Sub = N.getOperand(1); EVT SubVT = Sub.getValueType(); @@ -6602,8 +6601,10 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, return false; SmallVector SubMask; SmallVector SubInputs; - if (!resolveTargetShuffleInputs(Sub, SubInputs, SubMask, DAG) || - SubMask.size() != NumSubElts) + if (!resolveTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, + SubMask, DAG)) + return false; + if (SubMask.size() != NumSubElts) return false; Ops.push_back(Src); for (SDValue &SubInput : SubInputs) { diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index 63c65d7abda..2eb33ff0af8 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1553,12 +1553,19 @@ define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) { ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] ; AVX2-NEXT: retq ; -; AVX512VL-LABEL: shuffle_v8i32_08991abb: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] -; AVX512VL-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 -; AVX512VL-NEXT: retq +; AVX512VL-SLOW-LABEL: shuffle_v8i32_08991abb: +; AVX512VL-SLOW: # %bb.0: +; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero +; AVX512VL-SLOW-NEXT: vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3] +; AVX512VL-SLOW-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 +; AVX512VL-SLOW-NEXT: retq +; +; AVX512VL-FAST-LABEL: shuffle_v8i32_08991abb: +; AVX512VL-FAST: # %bb.0: +; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [8,0,1,1,9,2,3,3] +; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 +; AVX512VL-FAST-NEXT: vmovdqa %ymm2, %ymm0 +; AVX512VL-FAST-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } @@ -1605,9 +1612,9 @@ define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) { ; ; AVX512VL-FAST-LABEL: shuffle_v8i32_09ab1def: ; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7] -; AVX512VL-FAST-NEXT: vpermi2d %ymm2, %ymm1, %ymm0 +; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [8,1,2,3,9,5,6,7] +; AVX512VL-FAST-NEXT: vpermi2d %ymm0, %ymm1, %ymm2 +; AVX512VL-FAST-NEXT: vmovdqa %ymm2, %ymm0 ; AVX512VL-FAST-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle -- 2.40.0