From: Simon Pilgrim Date: Thu, 12 Jul 2018 13:03:58 +0000 (+0000) Subject: [X86][AVX] Use Zeroable mask to improve shuffle mask widening X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2d2bdce25cef6305c8042cec41d6b78e24bc26c6;p=llvm [X86][AVX] Use Zeroable mask to improve shuffle mask widening Noticed while updating D42044, lowerV2X128VectorShuffle can improve the shuffle mask with the zeroable data to create a target shuffle mask to recognise more 'zero upper 128' patterns. NOTE: lowerV4X128VectorShuffle could benefit as well but the code needs refactoring first to discriminate between SM_SentinelUndef and SM_SentinelZero for negative shuffle indices. Differential Revision: https://reviews.llvm.org/D49092 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336900 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e21e2bb6894..2ffd917337c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4973,6 +4973,19 @@ static bool canWidenShuffleElements(ArrayRef Mask, return true; } +static bool canWidenShuffleElements(ArrayRef Mask, + const APInt &Zeroable, + SmallVectorImpl &WidenedMask) { + SmallVector TargetMask(Mask.begin(), Mask.end()); + for (int i = 0, Size = TargetMask.size(); i < Size; ++i) { + if (TargetMask[i] == SM_SentinelUndef) + continue; + if (Zeroable[i]) + TargetMask[i] = SM_SentinelZero; + } + return canWidenShuffleElements(TargetMask, WidenedMask); +} + static bool canWidenShuffleElements(ArrayRef Mask) { SmallVector WidenedMask; return canWidenShuffleElements(Mask, WidenedMask); @@ -13144,7 +13157,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, return SDValue(); SmallVector WidenedMask; - if (!canWidenShuffleElements(Mask, WidenedMask)) + if (!canWidenShuffleElements(Mask, Zeroable, WidenedMask)) return SDValue(); bool IsLowZero = (Zeroable & 0x3) == 0x3; @@ -13213,7 +13226,8 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // [6] - ignore // [7] - zero high half of destination - assert(WidenedMask[0] >= 0 && WidenedMask[1] >= 0 && "Undef half?"); + assert((WidenedMask[0] >= 0 || IsLowZero) && + (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?"); unsigned PermMask = 0; PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); @@ -14326,6 +14340,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, // function lowerV2X128VectorShuffle() is better solution. assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle."); + // TODO - use Zeroable like we do for lowerV2X128VectorShuffle? SmallVector WidenedMask; if (!canWidenShuffleElements(Mask, WidenedMask)) return SDValue(); diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll index 868e61ce5ef..4240f798bf9 100644 --- a/test/CodeGen/X86/avx-cast.ll +++ b/test/CodeGen/X86/avx-cast.ll @@ -20,9 +20,7 @@ define <8 x float> @castA(<4 x float> %m) nounwind uwtable readnone ssp { define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp { ; AVX-LABEL: castB: ; AVX: ## %bb.0: -; AVX-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX-NEXT: vmovaps %xmm0, %xmm0 ; AVX-NEXT: retq %shuffle.i = shufflevector <2 x double> %m, <2 x double> zeroinitializer, <4 x i32> ret <4 x double> %shuffle.i @@ -33,9 +31,7 @@ define <4 x double> @castB(<2 x double> %m) nounwind uwtable readnone ssp { define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp { ; AVX-LABEL: castC: ; AVX: ## %bb.0: -; AVX-NEXT: ## kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX-NEXT: vmovaps %xmm0, %xmm0 ; AVX-NEXT: retq %shuffle.i = shufflevector <2 x i64> %m, <2 x i64> zeroinitializer, <4 x i32> ret <4 x i64> %shuffle.i