From 437e16fc51c3648388f96c8633d57bb3e2ee5933 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 19 Jun 2016 15:37:33 +0000 Subject: [PATCH] [X86] Pre-allocate a SmallVector instead of using push_back in a loop. NFC git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273114 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bebfbf28535..1202549abb4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10561,26 +10561,27 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT, SelectionDAG &DAG) { // FIXME: This should probably be generalized for 512-bit vectors as well. assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); - int LaneSize = Mask.size() / 2; + int Size = Mask.size(); + int LaneSize = Size / 2; // If there are only inputs from one 128-bit lane, splitting will in fact be // less expensive. The flags track whether the given lane contains an element // that crosses to another lane. bool LaneCrossing[2] = {false, false}; - for (int i = 0, Size = Mask.size(); i < Size; ++i) + for (int i = 0; i < Size; ++i) if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) LaneCrossing[(Mask[i] % Size) / LaneSize] = true; if (!LaneCrossing[0] || !LaneCrossing[1]) return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG); if (isSingleInputShuffleMask(Mask)) { - SmallVector FlippedBlendMask; - for (int i = 0, Size = Mask.size(); i < Size; ++i) - FlippedBlendMask.push_back( + SmallVector FlippedBlendMask(Size); + for (int i = 0; i < Size; ++i) + FlippedBlendMask[i] = Mask[i] < 0 ? -1 : (((Mask[i] % Size) / LaneSize == i / LaneSize) ? Mask[i] : Mask[i] % LaneSize + - (i / LaneSize) * LaneSize + Size)); + (i / LaneSize) * LaneSize + Size); // Flip the vector, and blend the results which should now be in-lane. The // VPERM2X128 mask uses the low 2 bits for the low source and bits 4 and -- 2.50.1