From 6048d74bb0f4cc96ea93fbc2e8a4b06229572813 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 5 Nov 2016 18:05:13 +0000 Subject: [PATCH] [X86][SSE] Reuse zeroable element mask in SSE4A EXTRQ/INSERTQ vector shuffle lowering. NFCI Don't regenerate a zeroable element mask with computeZeroableShuffleElements when its already available. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@286043 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f115c9c91b8..37914685468 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8025,13 +8025,12 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, /// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, + const SmallBitVector &Zeroable, SelectionDAG &DAG) { - SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2); - assert(!Zeroable.all() && "Fully zeroable shuffle mask"); - int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); + assert(!Zeroable.all() && "Fully zeroable shuffle mask"); // Upper half must be undefined. if (!isUndefInRange(Mask, HalfSize, HalfSize)) @@ -10112,7 +10111,8 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) - if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG)) + if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, DAG)) return V; // There are special ways we can lower some single-element blends. @@ -10265,7 +10265,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) - if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG)) + if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, + Zeroable, DAG)) return V; int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; }); -- 2.50.1