From: Sanjay Patel Date: Thu, 24 Jan 2019 18:52:12 +0000 (+0000) Subject: [x86] rename VectorShuffle -> Shuffle; NFC X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fc3b217ecc88ff0462ae25b02b65e4d573c5b7a9;p=llvm [x86] rename VectorShuffle -> Shuffle; NFC This wasn't consistent within the file, so made it harder to search. Standardize on the shorter name to save some typing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@352077 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5e7dad7c933..49cacc0606b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9858,12 +9858,11 @@ static bool isNonZeroElementsInOrder(const APInt &Zeroable, } /// Try to lower a shuffle with a single PSHUFB of V1 or V2. -static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT, + ArrayRef Mask, SDValue V1, + SDValue V2, const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { int Size = Mask.size(); int LaneSize = 128 / VT.getScalarSizeInBits(); const int NumBytes = VT.getSizeInBits() / 8; @@ -9917,11 +9916,11 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT, const SDLoc &dl); // X86 has dedicated shuffle that can be lowered to VEXPAND -static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT, - const APInt &Zeroable, - ArrayRef Mask, SDValue &V1, - SDValue &V2, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT, + const APInt &Zeroable, + ArrayRef Mask, SDValue &V1, + SDValue &V2, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { bool IsLeftZeroSide = true; if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), IsLeftZeroSide)) @@ -10027,9 +10026,9 @@ static bool matchVectorShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, // X86 has dedicated unpack instructions that can handle specific blend // operations: UNPCKH and UNPCKL. -static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { +static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT, + ArrayRef Mask, SDValue V1, SDValue V2, + SelectionDAG &DAG) { SmallVector Unpckl; createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false); if (isShuffleEquivalent(V1, V2, Mask, Unpckl)) @@ -10091,10 +10090,10 @@ static bool matchVectorShuffleAsVPMOV(ArrayRef Mask, bool SwappedOps, // // But when avx512vl is available, one can just use a single vpmovdw // instruction. -static SDValue lowerVectorShuffleWithVPMOV(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { if (VT != MVT::v16i8 && VT != MVT::v8i16) return SDValue(); @@ -10199,10 +10198,9 @@ static bool matchVectorShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, return false; } -static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef Mask, + SDValue V1, SDValue V2, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { MVT PackVT; unsigned PackOpcode; if (matchVectorShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG, @@ -10217,10 +10215,9 @@ static SDValue lowerVectorShuffleWithPACK(const SDLoc &DL, MVT VT, /// /// This handles cases where we can model a blend exactly as a bitmask due to /// one of the inputs being zeroable. -static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - const APInt &Zeroable, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && "Floating point types are not supported"); MVT EltVT = VT.getVectorElementType(); SDValue Zero = DAG.getConstant(0, DL, EltVT); @@ -10251,9 +10248,9 @@ static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, /// This is used as a fallback approach when first class blend instructions are /// unavailable. Currently it is only suitable for integer vectors, but could /// be generalized for floating point vectors if desirable. -static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + SelectionDAG &DAG) { assert(VT.isInteger() && "Only supports integer vector types!"); MVT EltVT = VT.getVectorElementType(); SDValue Zero = DAG.getConstant(0, DL, EltVT); @@ -10335,11 +10332,11 @@ static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, /// these values. It relies on the availability of the X86ISD::BLENDI pattern to /// be matched in the backend with the type given. What it does check for is /// that the shuffle mask is a blend, or convertible into a blend with zero. -static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Original, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Original, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { SmallVector Mask = createTargetShuffleMask(Original, Zeroable); uint64_t BlendMask = 0; @@ -10427,8 +10424,8 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, "256-bit byte-blends require AVX2 support!"); // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB. - if (SDValue Masked = - lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG)) + if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, + DAG)) return Masked; if (Subtarget.hasBWI() && Subtarget.hasVLX()) { @@ -10501,11 +10498,11 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, /// /// This matches the pattern where we can blend elements from two inputs and /// then reduce the shuffle to a single-input permutation. -static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - SelectionDAG &DAG, - bool ImmBlends = false) { +static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + SelectionDAG &DAG, + bool ImmBlends = false) { // We build up the blend mask while checking whether a blend is a viable way // to reduce the shuffle. SmallVector BlendMask(Mask.size(), -1); @@ -10540,10 +10537,10 @@ static SDValue lowerVectorShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, /// /// This matches the pattern where we can unpack elements from two inputs and /// then reduce the shuffle to a single-input (wider) permutation. -static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, + SDValue V1, SDValue V2, + ArrayRef Mask, + SelectionDAG &DAG) { int NumElts = Mask.size(); int NumLanes = VT.getSizeInBits() / 128; int NumLaneElts = NumElts / NumLanes; @@ -10603,7 +10600,7 @@ static SDValue lowerVectorShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, /// Helper to form a PALIGNR-based rotate+permute, merging 2 inputs and then /// permuting the elements of the result in place. -static SDValue lowerVectorShuffleAsByteRotateAndPermute( +static SDValue lowerShuffleAsByteRotateAndPermute( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) || @@ -10694,7 +10691,7 @@ static SDValue lowerVectorShuffleAsByteRotateAndPermute( /// shuffle+blend operations on newer X86 ISAs where we have very fast blend /// operations. It will try to pick the best arrangement of shuffles and /// blends. -static SDValue lowerVectorShuffleAsDecomposedShuffleBlend( +static SDValue lowerShuffleAsDecomposedShuffleBlend( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { // Shuffle the input elements into the desired positions in V1 and V2 and @@ -10718,18 +10715,18 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend( // pre-shuffle first is a better strategy. if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) { // Only prefer immediate blends to unpack/rotate. - if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute( - DL, VT, V1, V2, Mask, DAG, true)) + if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, + DAG, true)) return BlendPerm; - if (SDValue UnpackPerm = - lowerVectorShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG)) + if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, + DAG)) return UnpackPerm; - if (SDValue RotatePerm = lowerVectorShuffleAsByteRotateAndPermute( + if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute( DL, VT, V1, V2, Mask, Subtarget, DAG)) return RotatePerm; // Unpack/rotate failed - try again with variable blends. - if (SDValue BlendPerm = - lowerVectorShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, DAG)) + if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, + DAG)) return BlendPerm; } @@ -10741,8 +10738,7 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend( /// Try to lower a vector shuffle as a rotation. /// /// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512. -static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2, - ArrayRef Mask) { +static int matchShuffleAsRotate(SDValue &V1, SDValue &V2, ArrayRef Mask) { int NumElts = Mask.size(); // We need to detect various ways of spelling a rotation: @@ -10826,8 +10822,8 @@ static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2, /// elements, and takes the low elements as the result. Note that while this is /// specified as a *right shift* because x86 is little-endian, it is a *left /// rotate* of the vector lanes. -static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, - ArrayRef Mask) { +static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef Mask) { // Don't accept any shuffles with zero elements. if (any_of(Mask, [](int M) { return M == SM_SentinelZero; })) return -1; @@ -10837,7 +10833,7 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) return -1; - int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask); + int Rotation = matchShuffleAsRotate(V1, V2, RepeatedMask); if (Rotation <= 0) return -1; @@ -10848,15 +10844,14 @@ static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, return Rotation * Scale; } -static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); SDValue Lo = V1, Hi = V2; - int ByteRotation = matchVectorShuffleAsByteRotate(VT, Lo, Hi, Mask); + int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask); if (ByteRotation <= 0) return SDValue(); @@ -10904,11 +10899,10 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT, /// elements, and takes the low elements as the result. Note that while this is /// specified as a *right shift* because x86 is little-endian, it is a *left /// rotate* of the vector lanes. -static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && "Only 32-bit and 64-bit elements are supported!"); @@ -10917,7 +10911,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT, && "VLX required for 128/256-bit vectors"); SDValue Lo = V1, Hi = V2; - int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask); + int Rotation = matchShuffleAsRotate(Lo, Hi, Mask); if (Rotation <= 0) return SDValue(); @@ -10948,11 +10942,10 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT, /// [ 5, 6, 7, zz, zz, zz, zz, zz] /// [ -1, 5, 6, 7, zz, zz, zz, zz] /// [ 1, 2, -1, -1, -1, -1, zz, zz] -static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, - unsigned ScalarSizeInBits, - ArrayRef Mask, int MaskOffset, - const APInt &Zeroable, - const X86Subtarget &Subtarget) { +static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, + unsigned ScalarSizeInBits, ArrayRef Mask, + int MaskOffset, const APInt &Zeroable, + const X86Subtarget &Subtarget) { int Size = Mask.size(); unsigned SizeInBits = Size * ScalarSizeInBits; @@ -11011,11 +11004,11 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, return -1; } -static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { int Size = Mask.size(); assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); @@ -11024,14 +11017,13 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, unsigned Opcode; // Try to match shuffle against V1 shift. - int ShiftAmt = matchVectorShuffleAsShift( - ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget); + int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), + Mask, 0, Zeroable, Subtarget); // If V1 failed, try to match shuffle against V2 shift. if (ShiftAmt < 0) { - ShiftAmt = - matchVectorShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), - Mask, Size, Zeroable, Subtarget); + ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), + Mask, Size, Zeroable, Subtarget); V = V2; } @@ -11048,9 +11040,9 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, // EXTRQ: Extract Len elements from lower half of source, starting at Idx. // Remainder of lower half result is zero and upper half is all undef. -static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, - ArrayRef Mask, uint64_t &BitLen, - uint64_t &BitIdx, const APInt &Zeroable) { +static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef Mask, uint64_t &BitLen, + uint64_t &BitIdx, const APInt &Zeroable) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); @@ -11104,9 +11096,9 @@ static bool matchVectorShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, // INSERTQ: Extract lowest Len elements from lower half of second source and // insert over first source, starting at Idx. // { A[0], .., A[Idx-1], B[0], .., B[Len-1], A[Idx+Len], .., UNDEF, ... } -static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, - ArrayRef Mask, uint64_t &BitLen, - uint64_t &BitIdx) { +static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, + ArrayRef Mask, uint64_t &BitLen, + uint64_t &BitIdx) { int Size = Mask.size(); int HalfSize = Size / 2; assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); @@ -11170,17 +11162,16 @@ static bool matchVectorShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, } /// Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ. -static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - const APInt &Zeroable, - SelectionDAG &DAG) { +static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, SelectionDAG &DAG) { uint64_t BitLen, BitIdx; - if (matchVectorShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) + if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, DAG.getConstant(BitLen, DL, MVT::i8), DAG.getConstant(BitIdx, DL, MVT::i8)); - if (matchVectorShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) + if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), V2 ? V2 : DAG.getUNDEF(VT), DAG.getConstant(BitLen, DL, MVT::i8), @@ -11198,7 +11189,7 @@ static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, /// avoid excess shuffling the offset must either being in the bottom lane /// or at the start of a higher lane. All extended elements must be from /// the same lane. -static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( +static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(Scale > 1 && "Need a scale to extend."); @@ -11355,7 +11346,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend( /// /// The reason we have dedicated lowering for zext-style shuffles is that they /// are both incredibly common and often quite performance sensitive. -static SDValue lowerVectorShuffleAsZeroOrAnyExtend( +static SDValue lowerShuffleAsZeroOrAnyExtend( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11426,8 +11417,8 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend( if (Offset != 0 && Matches < 2) return SDValue(); - return lowerVectorShuffleAsSpecificZeroOrAnyExtend( - DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG); + return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt, + InputV, Mask, Subtarget, DAG); }; // The widest scale possible for extending is to a 64-bit integer. @@ -11511,7 +11502,7 @@ static bool isShuffleFoldableLoad(SDValue V) { /// /// This is a common pattern that we have especially efficient patterns to lower /// across all subtarget feature sets. -static SDValue lowerVectorShuffleAsElementInsertion( +static SDValue lowerShuffleAsElementInsertion( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, const X86Subtarget &Subtarget, SelectionDAG &DAG) { @@ -11609,10 +11600,10 @@ static SDValue lowerVectorShuffleAsElementInsertion( /// coming from a scalar_to_vector/build_vector node \p V0 with larger elements. /// /// This assumes we have AVX2. -static SDValue lowerVectorShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, - SDValue V0, int BroadcastIdx, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, + int BroadcastIdx, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(Subtarget.hasAVX2() && "We can only lower integer broadcasts with AVX2!"); @@ -11738,11 +11729,10 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, /// For convenience, this code also bundles all of the subtarget feature set /// filtering. While a little annoying to re-dispatch on type here, there isn't /// a convenient way to factor it out. -static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) || (Subtarget.hasAVX() && VT.isFloatingPoint()) || (Subtarget.hasAVX2() && VT.isInteger()))) @@ -11843,7 +11833,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, // type than the shuffle, the broadcast element is in essence truncated. // Make that explicit to ease folding. if (V.getOpcode() == ISD::BITCAST && VT.isInteger()) - if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast( + if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast( DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG)) return TruncBroadcast; @@ -11953,11 +11943,10 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT, // are much smaller to encode than a SHUFPS and an INSERTPS. We can also // perform INSERTPS if a single V1 element is out of place and all V2 // elements are zeroable. -static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2, - unsigned &InsertPSMask, - const APInt &Zeroable, - ArrayRef Mask, - SelectionDAG &DAG) { +static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2, + unsigned &InsertPSMask, + const APInt &Zeroable, + ArrayRef Mask, SelectionDAG &DAG) { assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!"); assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); @@ -12042,16 +12031,15 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2, return false; } -static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1, - SDValue V2, ArrayRef Mask, - const APInt &Zeroable, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, + ArrayRef Mask, const APInt &Zeroable, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); // Attempt to match the insertps pattern. unsigned InsertPSMask; - if (!matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG)) + if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG)) return SDValue(); // Insert the V2 element into the desired position. @@ -12068,7 +12056,7 @@ static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1, /// because for floating point vectors we have a generalized SHUFPS lowering /// strategy that handles everything that doesn't *exactly* match an unpack, /// making this clever lowering unnecessary. -static SDValue lowerVectorShuffleAsPermuteAndUnpack( +static SDValue lowerShuffleAsPermuteAndUnpack( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!VT.isFloatingPoint() && @@ -12183,19 +12171,18 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack( /// instructions will incur a domain crossing penalty on some chips though so /// it is better to avoid lowering through this for integer vectors where /// possible. -static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); if (V2.isUndef()) { // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( - DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2, + Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. Simulate this by using the @@ -12226,14 +12213,14 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // When loading a scalar and then shuffling it into a vector we can often do // the insertion cheaply. - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + if (SDValue Insertion = lowerShuffleAsElementInsertion( DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Try inverting the insertion since for v2 masks it is easy to do and we // can't reliably sort the mask one way or the other. int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2), Mask[1] < 0 ? -1 : (Mask[1] ^ 2)}; - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + if (SDValue Insertion = lowerShuffleAsElementInsertion( DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) return Insertion; @@ -12249,13 +12236,12 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S)); if (Subtarget.hasSSE41()) - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) return V; unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); @@ -12269,19 +12255,18 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// the integer unit to minimize domain crossing penalties. However, for blends /// it falls back to the floating point shuffle operation with appropriate bit /// casting. -static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); if (V2.isUndef()) { // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( - DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2, + Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. For everything from SSE2 @@ -12306,19 +12291,19 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, return Extract; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // When loading a scalar and then shuffling it into a vector we can often do // the insertion cheaply. - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + if (SDValue Insertion = lowerShuffleAsElementInsertion( DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Try inverting the insertion since for v2 masks it is easy to do and we // can't reliably sort the mask one way or the other. int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2}; - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + if (SDValue Insertion = lowerShuffleAsElementInsertion( DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) return Insertion; @@ -12326,33 +12311,32 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // *exact* same predicate. bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget.hasSSSE3()) { if (Subtarget.hasVLX()) - if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v2i64, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v2i64, V1, V2, Mask, + Subtarget, DAG)) return Rotate; - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask, + Subtarget, DAG)) return Rotate; } // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v2i64, V1, V2, Mask, + Subtarget, DAG); // We implement this with SHUFPD which is pretty lame because it will likely // incur 2 cycles of stall for integer vectors on Nehalem and older chips. @@ -12369,9 +12353,9 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This is a helper routine dedicated to lowering vector shuffles using SHUFPS. /// It makes no assumptions about whether this is the *best* lowering, it simply /// uses it. -static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { +static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, + ArrayRef Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { SDValue LowV = V1, HighV = V2; int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]}; @@ -12456,11 +12440,10 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT, /// Uses instructions exclusively from the floating point unit to minimize /// domain crossing penalties, as these are sufficient to implement all v4f32 /// shuffles. -static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); @@ -12469,8 +12452,8 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (NumV2Elements == 0) { // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( - DL, MVT::v4f32, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2, + Mask, Subtarget, DAG)) return Broadcast; // Use even/odd duplicate instructions for masks that match their pattern. @@ -12513,23 +12496,22 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // when the V2 input is targeting element 0 of the mask -- that is the fast // case here. if (NumV2Elements == 1 && Mask[0] >= 4) - if (SDValue V = lowerVectorShuffleAsElementInsertion( + if (SDValue V = lowerShuffleAsElementInsertion( DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; if (Subtarget.hasSSE41()) { - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Use INSERTPS if we can complete the shuffle efficiently. - if (SDValue V = - lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG)) + if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG)) return V; if (!isSingleSHUFPSMask(Mask)) - if (SDValue BlendPerm = lowerVectorShuffleAsBlendAndPermute( - DL, MVT::v4f32, V1, V2, Mask, DAG)) + if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, + V2, Mask, DAG)) return BlendPerm; } @@ -12543,23 +12525,21 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, } // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) return V; // Otherwise fall back to a SHUFPS lowering strategy. - return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); + return lowerShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); } /// Lower 4-lane i32 vector shuffles. /// /// We try to handle these with integer-domain shuffles where we can, but for /// blends we use the floating point domain blend instructions. -static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); @@ -12567,16 +12547,16 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( - DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return ZExt; int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); if (NumV2Elements == 0) { // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( - DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, + Mask, Subtarget, DAG)) return Broadcast; // Straight shuffle of a single input vector. For everything from SSE2 @@ -12600,13 +12580,13 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, return Extract; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) - if (SDValue V = lowerVectorShuffleAsElementInsertion( + if (SDValue V = lowerShuffleAsElementInsertion( DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; @@ -12614,29 +12594,28 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // *exact* same predicate. bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; - if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, - Zeroable, DAG)) + if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, + Zeroable, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) return V; // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. if (Subtarget.hasSSSE3()) { if (Subtarget.hasVLX()) - if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i32, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i32, V1, V2, Mask, + Subtarget, DAG)) return Rotate; - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask, + Subtarget, DAG)) return Rotate; } @@ -12647,12 +12626,12 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // If we have direct support for blends, we should lower by decomposing into // a permute. That will be faster than the domain cross. if (IsBlendSupported) - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask, + Subtarget, DAG); // Try to lower by permuting the inputs into an unpack instruction. - if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( - DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2, + Mask, Subtarget, DAG)) return Unpack; } @@ -12683,7 +12662,7 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// lane must shuffle the *exact* same way. In fact, you must pass a v8 Mask to /// this routine for it to work correctly. To shuffle a 256-bit or 512-bit i16 /// vector, form the analogous 128-bit 8-element Mask. -static SDValue lowerV8I16GeneralSingleInputVectorShuffle( +static SDValue lowerV8I16GeneralSingleInputShuffle( const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); @@ -12923,8 +12902,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( // Recurse back into this routine to re-compute state now that this isn't // a 3 and 1 problem. - return lowerV8I16GeneralSingleInputVectorShuffle(DL, VT, V, Mask, Subtarget, - DAG); + return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG); }; if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); @@ -13182,7 +13160,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle( /// Helper to form a PSHUFB-based shuffle+blend, opportunistically avoiding the /// blend if only one input is used. -static SDValue lowerVectorShuffleAsBlendOfPSHUFBs( +static SDValue lowerShuffleAsBlendOfPSHUFBs( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { assert(!is128BitLaneCrossingShuffleMask(VT, Mask) && @@ -13245,54 +13223,51 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs( /// the two inputs, try to interleave them. Otherwise, blend the low and high /// halves of the inputs separately (making them have relatively few inputs) /// and then concatenate them. -static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( - DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return ZExt; int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; }); if (NumV2Inputs == 0) { // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( - DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, + Mask, Subtarget, DAG)) return Broadcast; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, + Zeroable, Subtarget, DAG)) return Shift; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, - DAG, Subtarget)) + if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, + Subtarget)) return V; // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask, + Subtarget, DAG)) return Rotate; // Make a copy of the mask so it can be modified. SmallVector MutableMask(Mask.begin(), Mask.end()); - return lowerV8I16GeneralSingleInputVectorShuffle(DL, MVT::v8i16, V1, - MutableMask, Subtarget, - DAG); + return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v8i16, V1, MutableMask, + Subtarget, DAG); } assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) && @@ -13300,19 +13275,19 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, "shuffles."); // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) - if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, DAG)) + if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, DAG)) return V; // There are special ways we can lower some single-element blends. if (NumV2Inputs == 1) - if (SDValue V = lowerVectorShuffleAsElementInsertion( + if (SDValue V = lowerShuffleAsElementInsertion( DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; @@ -13320,50 +13295,49 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // *exact* same predicate. bool IsBlendSupported = Subtarget.hasSSE41(); if (IsBlendSupported) - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; - if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, DAG)) + if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, + Subtarget)) return V; // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask, + Subtarget, DAG)) return Rotate; if (SDValue BitBlend = - lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) + lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) return BitBlend; // Try to lower by permuting the inputs into an unpack instruction. - if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( - DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2, + Mask, Subtarget, DAG)) return Unpack; // If we can't directly blend but can use PSHUFB, that will be better as it // can both shuffle and set up the inefficient blend. if (!IsBlendSupported && Subtarget.hasSSSE3()) { bool V1InUse, V2InUse; - return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, - Zeroable, DAG, V1InUse, V2InUse); + return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, + Zeroable, DAG, V1InUse, V2InUse); } // We can always bit-blend if we have to so the fallback strategy is to // decompose into single-input permutes and blends. - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, + Mask, Subtarget, DAG); } /// Check whether a compaction lowering can be done by dropping even @@ -13432,9 +13406,9 @@ static int canLowerByDroppingEvenElements(ArrayRef Mask, return 0; } -static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { +static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, + ArrayRef Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements()); @@ -13452,39 +13426,38 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT, /// UNPCK to spread the i8 elements across two i16-element vectors, and uses /// the existing lowering for v8i16 blends on each half, finally PACK-ing them /// back together. -static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, + Subtarget)) return V; // Try to use a zext lowering. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( - DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return ZExt; // See if we can use SSE4A Extraction / Insertion. if (Subtarget.hasSSE4A()) - if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, - Zeroable, DAG)) + if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, + Zeroable, DAG)) return V; int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; }); @@ -13492,12 +13465,11 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // For single-input shuffles, there are some nicer lowering tricks we can use. if (NumV2Elements == 0) { // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( - DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2, + Mask, Subtarget, DAG)) return Broadcast; - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) return V; // Check whether we can widen this to an i16 shuffle by duplicating bytes. @@ -13590,13 +13562,12 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, return V; } - if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, - Zeroable, DAG)) + if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, + Zeroable, DAG)) return Masked; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) return V; // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly @@ -13616,7 +13587,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, bool V1InUse = false; bool V2InUse = false; - SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs( + SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs( DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse); // If both V1 and V2 are in use and we can use a direct blend or an unpack, @@ -13624,8 +13595,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // important as a single pshufb is significantly faster for that. if (V1InUse && V2InUse) { if (Subtarget.hasSSE41()) - if (SDValue Blend = lowerVectorShuffleAsBlend( - DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // We can use an unpack to do the blending rather than an or in some @@ -13636,17 +13607,17 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // FIXME: It might be worth trying to detect if the unpack-feeding // shuffles will both be pshufb, in which case we shouldn't bother with // this. - if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack( + if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return Unpack; // If we have VBMI we can use one VPERM instead of multiple PSHUFBs. if (Subtarget.hasVBMI() && Subtarget.hasVLX()) - return lowerVectorShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, DAG); // Use PALIGNR+Permute if possible - permute might become PSHUFB but the // PALIGNR will be cheaper than the second PSHUFB+OR. - if (SDValue V = lowerVectorShuffleAsByteRotateAndPermute( + if (SDValue V = lowerShuffleAsByteRotateAndPermute( DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) return V; } @@ -13656,13 +13627,12 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // There are special ways we can lower some single-element blends. if (NumV2Elements == 1) - if (SDValue V = lowerVectorShuffleAsElementInsertion( + if (SDValue V = lowerShuffleAsElementInsertion( DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return V; - if (SDValue BitBlend = - lowerVectorShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) - return BitBlend; + if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) + return Blend; // Check whether a compaction lowering can be done. This handles shuffles // which take every Nth element for some even N. See the helper function for @@ -13703,8 +13673,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Handle multi-input cases by blending single-input shuffles. if (NumV2Elements > 0) - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v16i8, V1, V2, Mask, + Subtarget, DAG); // The fallback path for single-input shuffles widens this into two v8i16 // vectors with unpacks, shuffles those, and then pulls them back together @@ -13759,24 +13729,24 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// /// This routine breaks down the specific type of 128-bit shuffle and /// dispatches to the lowering routines accordingly. -static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { switch (VT.SimpleTy) { case MVT::v2i64: - return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v2f64: - return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV2F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4i32: - return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV4I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4f32: - return lowerV4F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i16: - return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i8: - return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Unimplemented!"); @@ -13788,9 +13758,9 @@ static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine just extracts two subvectors, shuffles them independently, and /// then concatenates them back together. This should work effectively with all /// AVX vector shuffle types. -static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - SelectionDAG &DAG) { +static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + SelectionDAG &DAG) { assert(VT.getSizeInBits() >= 256 && "Only for 256-bit or wider vector shuffles!"); assert(V1.getSimpleValueType() == VT && "Bad operand type!"); @@ -13914,11 +13884,10 @@ static SDValue splitAndLowerVectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, /// between splitting the shuffle into 128-bit components and stitching those /// back together vs. extracting the single-input shuffles and blending those /// results. -static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(!V2.isUndef() && "This routine must not be used to lower single-input " "shuffles as it could then recurse on itself."); int Size = Mask.size(); @@ -13943,8 +13912,8 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, return true; }; if (DoBothBroadcast()) - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, + Subtarget, DAG); // If the inputs all stem from a single 128-bit lane of each input, then we // split them rather than blending because the split will decompose to @@ -13958,12 +13927,12 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, if (Mask[i] >= 0) LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true; if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1) - return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); + return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); // Otherwise, just fall back to decomposed shuffles and a blend. This requires // that the decomposed single-input shuffles don't end up here. - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, VT, V1, V2, Mask, Subtarget, + DAG); } /// Lower a vector shuffle crossing multiple 128-bit lanes as @@ -13972,9 +13941,9 @@ static SDValue lowerVectorShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, /// This is mainly for cases where we can have non-repeating permutes /// in each lane. /// -/// TODO: This is very similar to lowerVectorShuffleByMerging128BitLanes, +/// TODO: This is very similar to lowerShuffleByMerging128BitLanes, /// we should investigate merging them. -static SDValue lowerVectorShuffleAsLanePermuteAndPermute( +static SDValue lowerShuffleAsLanePermuteAndPermute( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, SelectionDAG &DAG, const X86Subtarget &Subtarget) { int NumElts = VT.getVectorNumElements(); @@ -14029,11 +13998,9 @@ static SDValue lowerVectorShuffleAsLanePermuteAndPermute( /// is lower than any other fully general cross-lane shuffle strategy I'm aware /// of. Special cases for each particular shuffle pattern should be handled /// prior to trying this lowering. -static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { +static SDValue lowerShuffleAsLanePermuteAndBlend( + const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, + SelectionDAG &DAG, const X86Subtarget &Subtarget) { // FIXME: This should probably be generalized for 512-bit vectors as well. assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); int Size = Mask.size(); @@ -14048,14 +14015,14 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) LaneCrossing[(Mask[i] % Size) / LaneSize] = true; if (!LaneCrossing[0] || !LaneCrossing[1]) - return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); + return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); } else { bool LaneUsed[2] = {false, false}; for (int i = 0; i < Size; ++i) if (Mask[i] >= 0) LaneUsed[(Mask[i] / LaneSize)] = true; if (!LaneUsed[0] || !LaneUsed[1]) - return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); + return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); } assert(V2.isUndef() && @@ -14079,11 +14046,11 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, } /// Handle lowering 2-lane 128-bit shuffles. -static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, - SDValue V2, ArrayRef Mask, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { // With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding. if (Subtarget.hasAVX2() && V2.isUndef()) return SDValue(); @@ -14110,8 +14077,8 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, // instruction bytes needed to explicitly generate the zero vector. // Blends are faster and handle all the non-lane-crossing cases. - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, + Subtarget, DAG)) return Blend; // If either input operand is a zero vector, use VPERM2X128 because its mask @@ -14184,7 +14151,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1, /// position needed by the final shuffle. /// /// FIXME: This should be generalized to 512-bit shuffles. -static SDValue lowerVectorShuffleByMerging128BitLanes( +static SDValue lowerShuffleByMerging128BitLanes( const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { assert(!V2.isUndef() && "This is only useful with multiple inputs."); @@ -14351,11 +14318,10 @@ static SDValue lowerVectorShuffleByMerging128BitLanes( /// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF. /// This allows for fast cases such as subvector extraction/insertion /// or shuffling smaller vector types which can lower more efficiently. -static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT, - SDValue V1, SDValue V2, - ArrayRef Mask, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, + SDValue V2, ArrayRef Mask, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert((VT.is256BitVector() || VT.is512BitVector()) && "Expected 256-bit or 512-bit vector"); @@ -14658,9 +14624,8 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( SubLaneMask); } -static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, - unsigned &ShuffleImm, - ArrayRef Mask) { +static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, + unsigned &ShuffleImm, ArrayRef Mask) { int NumElts = VT.getVectorNumElements(); assert(VT.getScalarSizeInBits() == 64 && (NumElts == 2 || NumElts == 4 || NumElts == 8) && @@ -14695,14 +14660,14 @@ static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, return false; } -static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT, - ArrayRef Mask, SDValue V1, - SDValue V2, SelectionDAG &DAG) { +static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, + ArrayRef Mask, SDValue V1, + SDValue V2, SelectionDAG &DAG) { assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64)&& "Unexpected data type for VSHUFPD"); unsigned Immediate = 0; - if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask)) + if (!matchShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask)) return SDValue(); return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, @@ -14713,23 +14678,22 @@ static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT, /// /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2 /// isn't available. -static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable, + Subtarget, DAG)) return V; if (V2.isUndef()) { // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast( - DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2, + Mask, Subtarget, DAG)) return Broadcast; // Use low duplicate instructions for masks that match their pattern. @@ -14757,27 +14721,25 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, return V; // Try to permute the lanes and then use a per-lane permute. - if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( - DL, MVT::v4f64, V1, V2, Mask, DAG, Subtarget)) + if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2, + Mask, DAG, Subtarget)) return V; // Otherwise, fall back. - return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v4f64, V1, V2, Mask, DAG, + Subtarget); } // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Check if the blend happens to exactly fit that of SHUFPD. - if (SDValue Op = - lowerVectorShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) + if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, Mask, V1, V2, DAG)) return Op; // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -14792,52 +14754,51 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // instruction so skip this pattern. if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)))) - if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( - DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) - return Result; + if (SDValue V = lowerShuffleByMerging128BitLanes(DL, MVT::v4f64, V1, V2, + Mask, Subtarget, DAG)) + return V; // If we have VLX support, we can use VEXPAND. if (Subtarget.hasVLX()) - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, - V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; // If we have AVX2 then we always want to lower with a blend because an v4 we // can fully permute the elements. if (Subtarget.hasAVX2()) - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4f64, V1, V2, Mask, + Subtarget, DAG); // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 4-lane 64-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v4i64 shuffling.. -static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); - if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable, + Subtarget, DAG)) return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, + Subtarget, DAG)) return Broadcast; if (V2.isUndef()) { @@ -14861,29 +14822,28 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, } // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // If we have VLX support, we can use VALIGN or VEXPAND. if (Subtarget.hasVLX()) { - if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v4i64, V1, V2, Mask, + Subtarget, DAG)) return Rotate; - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, - V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; } // Try to use PALIGNR. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) return V; // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -14898,35 +14858,34 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, // instruction so skip this pattern. if (!isShuffleMaskInputInPlace(0, Mask) && !isShuffleMaskInputInPlace(1, Mask)) - if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( + if (SDValue Result = lowerShuffleByMerging128BitLanes( DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic blend lowering. - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v4i64, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 8-lane 32-bit floating point shuffles. /// /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2 /// isn't available. -static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, + Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane, we have many more @@ -14947,13 +14906,12 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) return V; // Otherwise, fall back to a SHUFPS sequence. Here it is important that we // have already handled any direct blends. - return lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG); + return lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG); } // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -14973,49 +14931,48 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); // Otherwise, fall back. - return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask, + DAG, Subtarget); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. - if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( + if (SDValue Result = lowerShuffleByMerging128BitLanes( DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) return Result; // If we have VLX support, we can use VEXPAND. if (Subtarget.hasVLX()) - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, - V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; // For non-AVX512 if the Mask is of 16bit elements in lane then try to split // since after split we get a more efficient code using vpunpcklwd and // vpunpckhwd instrs than vblend. if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) - if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, + Subtarget, DAG)) return V; // If we have AVX2 then we always want to lower with a blend because at v8 we // can fully permute the elements. if (Subtarget.hasAVX2()) - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8f32, V1, V2, Mask, + Subtarget, DAG); // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 8-lane 32-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v8i32 shuffling.. -static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); @@ -15024,8 +14981,8 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( - DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return ZExt; // For non-AVX512 if the Mask is of 16bit elements in lane then try to split @@ -15033,17 +14990,17 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // vpunpcklwd and vpunpckhwd instrs. if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && !Subtarget.hasAVX512()) - if (SDValue V = lowerVectorShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue V = lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, + Subtarget, DAG)) return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, + Subtarget, DAG)) return Broadcast; // If the shuffle mask is repeated in each 128-bit lane we can use more @@ -15059,30 +15016,29 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) return V; } // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // If we have VLX support, we can use VALIGN or EXPAND. if (Subtarget.hasVLX()) { - if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i32, V1, V2, Mask, + Subtarget, DAG)) return Rotate; - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, - V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; } // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -15104,31 +15060,30 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1); SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2); - SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, - CastV1, CastV2, DAG); + SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, + CastV1, CastV2, DAG); return DAG.getBitcast(MVT::v8i32, ShufPS); } // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. - if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( + if (SDValue Result = lowerShuffleByMerging128BitLanes( DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) return Result; // Otherwise fall back on generic blend lowering. - return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, - Mask, Subtarget, DAG); + return lowerShuffleAsDecomposedShuffleBlend(DL, MVT::v8i32, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 16-lane 16-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v16i16 shuffling.. -static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); @@ -15137,37 +15092,36 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask, + Subtarget, DAG)) return Broadcast; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, + Subtarget)) return V; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -15180,12 +15134,12 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // There are no generalized cross-lane shuffle operations available on i16 // element types. if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) { - if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( + if (SDValue V = lowerShuffleAsLanePermuteAndPermute( DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) return V; - return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, - Mask, DAG, Subtarget); + return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v16i16, V1, V2, Mask, + DAG, Subtarget); } SmallVector RepeatedMask; @@ -15193,44 +15147,43 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v16 case. - return lowerV8I16GeneralSingleInputVectorShuffle( + return lowerV8I16GeneralSingleInputShuffle( DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); } } - if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( - DL, MVT::v16i16, Mask, V1, V2, Zeroable, Subtarget, DAG)) + if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2, + Zeroable, Subtarget, DAG)) return PSHUFB; // AVX512BWVL can lower to VPERMW. if (Subtarget.hasBWI() && Subtarget.hasVLX()) - return lowerVectorShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG); // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. - if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( + if (SDValue Result = lowerShuffleByMerging128BitLanes( DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) return Result; // Try to permute the lanes and then use a per-lane permute. - if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( + if (SDValue V = lowerShuffleAsLanePermuteAndPermute( DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) return V; // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, + Subtarget, DAG); } /// Handle lowering of 32-lane 8-bit integer shuffles. /// /// This routine is only called when we have AVX2 and thus a reasonable /// instruction set for v32i8 shuffling.. -static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); @@ -15239,37 +15192,36 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( - DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return ZExt; // Check for being able to broadcast a single element. - if (SDValue Broadcast = lowerVectorShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, + Subtarget, DAG)) return Broadcast; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, + Subtarget)) return V; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Try to create an in-lane repeating shuffle mask and then shuffle the @@ -15281,36 +15233,36 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // There are no generalized cross-lane shuffle operations available on i8 // element types. if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) { - if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( + if (SDValue V = lowerShuffleAsLanePermuteAndPermute( DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; - return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, - DAG, Subtarget); + return lowerShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask, DAG, + Subtarget); } - if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( - DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG)) + if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2, + Zeroable, Subtarget, DAG)) return PSHUFB; // AVX512VBMIVL can lower to VPERMB. if (Subtarget.hasVBMI() && Subtarget.hasVLX()) - return lowerVectorShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, DAG); // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. - if (SDValue Result = lowerVectorShuffleByMerging128BitLanes( + if (SDValue Result = lowerShuffleByMerging128BitLanes( DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) return Result; // Try to permute the lanes and then use a per-lane permute. - if (SDValue V = lowerVectorShuffleAsLanePermuteAndPermute( + if (SDValue V = lowerShuffleAsLanePermuteAndPermute( DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) return V; // Otherwise fall back on generic lowering. - return lowerVectorShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, - Subtarget, DAG); + return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, + Subtarget, DAG); } /// High-level routine to lower various 256-bit x86 vector shuffles. @@ -15318,24 +15270,23 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine either breaks down the specific type of a 256-bit x86 vector /// shuffle or splits it into two 128-bit shuffles and fuses the results back /// together based on the available instructions. -static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { // If we have a single input to the zero element, insert that into V1 if we // can do so cheaply. int NumElts = VT.getVectorNumElements(); int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); if (NumV2Elements == 1 && Mask[0] >= NumElts) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + if (SDValue Insertion = lowerShuffleAsElementInsertion( DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Handle special cases where the lower or upper half is UNDEF. if (SDValue V = - lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) + lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; // There is a really nice hard cut-over between AVX1 and AVX2 that means we @@ -15349,12 +15300,12 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, if (ElementBits < 32) { // No floating point type available, if we can't use the bit operations // for masking/blending then decompose into 128-bit vectors. - if (SDValue V = - lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG)) + if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, + DAG)) return V; - if (SDValue V = lowerVectorShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) + if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) return V; - return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); + return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); } MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), @@ -15366,17 +15317,17 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, switch (VT.SimpleTy) { case MVT::v4f64: - return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v4i64: - return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV4I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8f32: - return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV8F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i32: - return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV8I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i16: - return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV16I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v32i8: - return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV32I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Not a valid 256-bit x86 vector type!"); @@ -15384,12 +15335,10 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, } /// Try to lower a vector shuffle as a 128-bit shuffles. -static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, - ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(VT.getScalarSizeInBits() == 64 && "Unexpected element type size for 128bit shuffle."); @@ -15486,11 +15435,10 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT, } /// Handle lowering of 8-lane 64-bit floating point shuffles. -static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); @@ -15517,37 +15465,33 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); } - if (SDValue Shuf128 = - lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, Zeroable, V1, V2, - Subtarget, DAG)) + if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1, + V2, Subtarget, DAG)) return Shuf128; - if (SDValue Unpck = - lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) + if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Unpck; // Check if the blend happens to exactly fit that of SHUFPD. - if (SDValue Op = - lowerVectorShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG)) + if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, Mask, V1, V2, DAG)) return Op; - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, - V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; - return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG); } /// Handle lowering of 16-lane 32-bit floating point shuffles. -static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); @@ -15569,16 +15513,15 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue Unpck = - lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) - return Unpck; + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) + return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // Otherwise, fall back to a SHUFPS sequence. - return lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG); + return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG); } // If we have a single input shuffle with different shuffle patterns in the @@ -15590,19 +15533,18 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef Mask, } // If we have AVX512F support, we can use VEXPAND. - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, V1, V2, DAG, Subtarget)) return V; - return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG); } /// Handle lowering of 8-lane 64-bit integer shuffles. -static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); @@ -15628,47 +15570,44 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG)); } - if (SDValue Shuf128 = - lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, Zeroable, - V1, V2, Subtarget, DAG)) + if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1, + V2, Subtarget, DAG)) return Shuf128; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // Try to use VALIGN. - if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v8i64, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Try to use PALIGNR. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask, + Subtarget, DAG)) return Rotate; - if (SDValue Unpck = - lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) + if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) return Unpck; // If we have AVX512F support, we can use VEXPAND. - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, - V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; - return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG); } /// Handle lowering of 16-lane 32-bit integer shuffles. -static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); @@ -15676,7 +15615,7 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; @@ -15693,25 +15632,24 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) return V; } // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // Try to use VALIGN. - if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2, - Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsRotate(DL, MVT::v16i32, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Try to use byte rotation instructions. if (Subtarget.hasBWI()) - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask, + Subtarget, DAG)) return Rotate; // Assume that a single SHUFPS is faster than using a permv shuffle. @@ -15719,27 +15657,26 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef Mask, if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); - SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, - CastV1, CastV2, DAG); + SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, + CastV1, CastV2, DAG); return DAG.getBitcast(MVT::v16i32, ShufPS); } // If we have AVX512F support, we can use VEXPAND. - if (SDValue V = lowerVectorShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, - V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2, + DAG, Subtarget)) return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; - return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG); } /// Handle lowering of 32-lane 16-bit integer shuffles. -static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); @@ -15748,23 +15685,22 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) return V; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v32i16, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask, + Subtarget, DAG)) return Rotate; if (V2.isUndef()) { @@ -15773,28 +15709,27 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef Mask, // As this is a single-input shuffle, the repeated mask should be // a strictly valid v8i16 mask that we can pass through to the v8i16 // lowering to handle even the v32 case. - return lowerV8I16GeneralSingleInputVectorShuffle( + return lowerV8I16GeneralSingleInputShuffle( DL, MVT::v32i16, V1, RepeatedMask, Subtarget, DAG); } } - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Blend; - if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( - DL, MVT::v32i16, Mask, V1, V2, Zeroable, Subtarget, DAG)) + if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2, + Zeroable, Subtarget, DAG)) return PSHUFB; - return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG); } /// Handle lowering of 64-lane 8-bit integer shuffles. -static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, - const APInt &Zeroable, - SDValue V1, SDValue V2, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef Mask, + const APInt &Zeroable, SDValue V1, SDValue V2, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); @@ -15803,37 +15738,36 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, // Whenever we can lower this as a zext, that instruction is strictly faster // than any alternative. It also allows us to fold memory operands into the // shuffle in many cases. - if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend( + if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) return ZExt; // Use dedicated unpack instructions for masks that match their pattern. - if (SDValue V = - lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) + if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) return V; // Use dedicated pack instructions for masks that match their pattern. - if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, - Subtarget)) + if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, + Subtarget)) return V; // Try to use shift instructions. - if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Shift; // Try to use byte rotation instructions. - if (SDValue Rotate = lowerVectorShuffleAsByteRotate( - DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask, + Subtarget, DAG)) return Rotate; - if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB( - DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG)) + if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2, + Zeroable, Subtarget, DAG)) return PSHUFB; // VBMI can use VPERMV/VPERMV3 byte shuffles. if (Subtarget.hasVBMI()) - return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG); + return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG); // Try to create an in-lane repeating shuffle mask and then shuffle the // results into the target lanes. @@ -15841,12 +15775,12 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) return V; - if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, - Zeroable, Subtarget, DAG)) + if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, + Zeroable, Subtarget, DAG)) return Blend; // FIXME: Implement direct support for this type! - return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); + return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); } /// High-level routine to lower various 512-bit x86 vector shuffles. @@ -15854,11 +15788,11 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef Mask, /// This routine either breaks down the specific type of a 512-bit x86 vector /// shuffle or splits it into two 256-bit shuffles and fuses the results back /// together based on the available instructions. -static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/ basic ISA!"); @@ -15868,18 +15802,18 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); if (NumV2Elements == 1 && Mask[0] >= NumElts) - if (SDValue Insertion = lowerVectorShuffleAsElementInsertion( + if (SDValue Insertion = lowerShuffleAsElementInsertion( DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) return Insertion; // Handle special cases where the lower or upper half is UNDEF. if (SDValue V = - lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) + lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) return V; // Check for being able to broadcast a single element. - if (SDValue Broadcast = - lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, + Subtarget, DAG)) return Broadcast; // Dispatch to each element type for lowering. If we don't have support for @@ -15888,17 +15822,17 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, // the requisite ISA extensions for that element type are available. switch (VT.SimpleTy) { case MVT::v8f64: - return lowerV8F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV8F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16f32: - return lowerV16F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV16F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v8i64: - return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV8I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v16i32: - return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV16I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v32i16: - return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV32I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); case MVT::v64i8: - return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); + return lowerV64I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); default: llvm_unreachable("Not a valid 512-bit x86 vector type!"); @@ -15907,7 +15841,7 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, // Determine if this shuffle can be implemented with a KSHIFT instruction. // Returns the shift amount if possible or -1 if not. This is a simplified -// version of matchVectorShuffleAsShift. +// version of matchShuffleAsShift. static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef Mask, int MaskOffset, const APInt &Zeroable) { int Size = Mask.size(); @@ -15942,11 +15876,11 @@ static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef Mask, // There is no a dedicated instruction on AVX-512 that shuffles the masks. // The only way to shuffle bits is to sign-extend the mask vector to SIMD // vector, shuffle and then truncate it back. -static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef Mask, - MVT VT, SDValue V1, SDValue V2, - const APInt &Zeroable, - const X86Subtarget &Subtarget, - SelectionDAG &DAG) { +static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + const APInt &Zeroable, + const X86Subtarget &Subtarget, + SelectionDAG &DAG) { assert(Subtarget.hasAVX512() && "Cannot lower 512-bit vectors w/o basic ISA!"); @@ -16181,8 +16115,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // by obfuscating the operands with bitcasts. // TODO: Avoid lowering directly from this top-level function: make this // a query (canLowerAsBroadcast) and defer lowering to the type-based calls. - if (SDValue Broadcast = - lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG)) + if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, + Subtarget, DAG)) return Broadcast; MVT NewEltVT = VT.isFloatingPoint() @@ -16220,26 +16154,21 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, if (canonicalizeShuffleMaskWithCommute(Mask)) return DAG.getCommutedVectorShuffle(*SVOp); - if (SDValue V = - lowerVectorShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget)) + if (SDValue V = lowerShuffleWithVPMOV(DL, Mask, VT, V1, V2, DAG, Subtarget)) return V; // For each vector width, delegate to a specialized lowering routine. if (VT.is128BitVector()) - return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, - DAG); + return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); if (VT.is256BitVector()) - return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, - DAG); + return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); if (VT.is512BitVector()) - return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, - DAG); + return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); if (Is1BitVector) - return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, - DAG); + return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); llvm_unreachable("Unimplemented!"); } @@ -30422,12 +30351,11 @@ SDValue X86TargetLowering::unwrapAddress(SDValue N) const { // Attempt to match a combined shuffle mask against supported unary shuffle // instructions. // TODO: Investigate sharing more of this with shuffle lowering. -static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, - bool AllowFloatDomain, bool AllowIntDomain, - SDValue &V1, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget, - unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) { +static bool matchUnaryShuffle(MVT MaskVT, ArrayRef Mask, + bool AllowFloatDomain, bool AllowIntDomain, + SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, + const X86Subtarget &Subtarget, unsigned &Shuffle, + MVT &SrcVT, MVT &DstVT) { unsigned NumMaskElts = Mask.size(); unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); @@ -30559,13 +30487,12 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, // Attempt to match a combined shuffle mask against supported unary immediate // permute instructions. // TODO: Investigate sharing more of this with shuffle lowering. -static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, - const APInt &Zeroable, - bool AllowFloatDomain, - bool AllowIntDomain, - const X86Subtarget &Subtarget, - unsigned &Shuffle, MVT &ShuffleVT, - unsigned &PermuteImm) { +static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef Mask, + const APInt &Zeroable, + bool AllowFloatDomain, bool AllowIntDomain, + const X86Subtarget &Subtarget, + unsigned &Shuffle, MVT &ShuffleVT, + unsigned &PermuteImm) { unsigned NumMaskElts = Mask.size(); unsigned InputSizeInBits = MaskVT.getSizeInBits(); unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; @@ -30666,9 +30593,8 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // FIXME: Add 512-bit support. if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { - int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle, - MaskScalarSizeInBits, Mask, - 0, Zeroable, Subtarget); + int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits, + Mask, 0, Zeroable, Subtarget); if (0 < ShiftAmt) { PermuteImm = (unsigned)ShiftAmt; return true; @@ -30681,13 +30607,12 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef Mask, // Attempt to match a combined unary shuffle mask against supported binary // shuffle instructions. // TODO: Investigate sharing more of this with shuffle lowering. -static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, - bool AllowFloatDomain, bool AllowIntDomain, - SDValue &V1, SDValue &V2, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget, - unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, - bool IsUnary) { +static bool matchBinaryShuffle(MVT MaskVT, ArrayRef Mask, + bool AllowFloatDomain, bool AllowIntDomain, + SDValue &V1, SDValue &V2, const SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget, + unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, + bool IsUnary) { unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); if (MaskVT.is128BitVector()) { @@ -30748,7 +30673,7 @@ static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef Mask, return false; } -static bool matchBinaryPermuteVectorShuffle( +static bool matchBinaryPermuteShuffle( MVT MaskVT, ArrayRef Mask, const APInt &Zeroable, bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, @@ -30759,7 +30684,7 @@ static bool matchBinaryPermuteVectorShuffle( // Attempt to match against PALIGNR byte rotate. if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) { - int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask); + int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask); if (0 < ByteRotation) { Shuffle = X86ISD::PALIGNR; ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8); @@ -30832,7 +30757,7 @@ static bool matchBinaryPermuteVectorShuffle( if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && MaskVT.is128BitVector()) { if (Zeroable.getBoolValue() && - matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { + matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { Shuffle = X86ISD::INSERTPS; ShuffleVT = MVT::v4f32; return true; @@ -30844,7 +30769,7 @@ static bool matchBinaryPermuteVectorShuffle( ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || (MaskVT.is256BitVector() && Subtarget.hasAVX()) || (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { - if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) { + if (matchShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) { Shuffle = X86ISD::SHUFP; ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); return true; @@ -31036,9 +30961,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, } SDValue NewV1 = V1; // Save operand in case early exit happens. - if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - NewV1, DL, DAG, Subtarget, Shuffle, - ShuffleSrcVT, ShuffleVT) && + if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, + DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, + ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! @@ -31047,9 +30972,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return DAG.getBitcast(RootVT, Res); } - if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, - AllowIntDomain, Subtarget, Shuffle, - ShuffleVT, PermuteImm) && + if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, + AllowIntDomain, Subtarget, Shuffle, ShuffleVT, + PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! @@ -31062,9 +30987,9 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, SDValue NewV1 = V1; // Save operands in case early exit happens. SDValue NewV2 = V2; - if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - NewV1, NewV2, DL, DAG, Subtarget, Shuffle, - ShuffleSrcVT, ShuffleVT, UnaryShuffle) && + if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, + NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, + ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! @@ -31076,7 +31001,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, NewV1 = V1; // Save operands in case early exit happens. NewV2 = V2; - if (matchBinaryPermuteVectorShuffle( + if (matchBinaryPermuteShuffle( MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { @@ -31096,8 +31021,8 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, // Annoyingly, SSE4A instructions don't map into the above match helpers. if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { uint64_t BitLen, BitIdx; - if (matchVectorShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, - Zeroable)) { + if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, + Zeroable)) { if (Depth == 1 && Root.getOpcode() == X86ISD::EXTRQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1); @@ -31107,7 +31032,7 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, return DAG.getBitcast(RootVT, Res); } - if (matchVectorShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { + if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTQI) return SDValue(); // Nothing to do! V1 = DAG.getBitcast(IntMaskVT, V1);