From: Simon Pilgrim Date: Sat, 5 Nov 2016 18:31:57 +0000 (+0000) Subject: [X86][SSE] Reuse zeroable element mask in lowerVectorShuffleAsBlend. NFCI X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cc24df9a7e9ab2baa874133cda34d55a28bb2a3a;p=llvm [X86][SSE] Reuse zeroable element mask in lowerVectorShuffleAsBlend. NFCI Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286045 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index d81ffb10a3a..dd5d195bebe 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7558,12 +7558,12 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, /// that the shuffle mask is a blend, or convertible into a blend with zero. static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Original, + const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); SmallVector Mask(Original.begin(), Original.end()); - SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); bool ForceV1Zero = false, ForceV2Zero = false; // Attempt to generate the binary blend mask. If an input is zero then @@ -9047,6 +9047,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT, /// it is better to avoid lowering through this for integer vectors where /// possible. static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -9108,7 +9109,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (Subtarget.hasSSE41()) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. @@ -9201,7 +9202,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. @@ -9396,7 +9397,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (Subtarget.hasSSE41()) { if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Use INSERTPS if we can complete the shuffle efficiently. @@ -9485,7 +9486,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, @@ -10125,7 +10126,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, @@ -10402,8 +10403,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // important as a single pshufb is significantly faster for that. if (V1InUse && V2InUse) { if (Subtarget.hasSSE41()) - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Blend = lowerVectorShuffleAsBlend( + DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; // We can use an unpack to do the blending rather than an or in some @@ -10536,7 +10537,7 @@ static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, case MVT::v2i64: return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v2f64: - return lowerV2F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4i32: return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4f32: @@ -10841,6 +10842,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, /// \brief Handle lowering 2-lane 128-bit shuffles. static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, + const SmallBitVector &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // TODO: If minimizing size and one of the inputs is a zero vector and the @@ -10849,7 +10851,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // Blends are faster and handle all the non-lane-crossing cases. if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode()); @@ -11359,6 +11361,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT, /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11369,7 +11372,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, SmallVector WidenedMask; if (canWidenShuffleElements(Mask, WidenedMask)) if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return V; if (V2.isUndef()) { @@ -11413,7 +11416,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Check if the blend happens to exactly fit that of SHUFPD. @@ -11464,11 +11467,11 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, SmallVector WidenedMask; if (canWidenShuffleElements(Mask, WidenedMask)) if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return V; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. @@ -11530,6 +11533,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2 /// isn't available. static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, + const SmallBitVector &Zeroable, SDValue V1, SDValue V2, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11538,7 +11542,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. @@ -11632,7 +11636,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, return ZExt; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. @@ -11717,7 +11721,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, return Broadcast; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. @@ -11803,7 +11807,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, return Broadcast; if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, - Subtarget, DAG)) + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. @@ -11900,11 +11904,11 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, switch (VT.SimpleTy) { case MVT::v4f64: - return lowerV4F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4i64: return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8f32: - return lowerV8F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG); + return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i32: return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i16: