[X86] Use APInt instead of SmallBitVector for tracking Zeroable elements in shuffle...

author Craig Topper <craig.topper@gmail.com>

Mon, 27 Feb 2017 16:15:30 +0000 (16:15 +0000)

committer Craig Topper <craig.topper@gmail.com>

Mon, 27 Feb 2017 16:15:30 +0000 (16:15 +0000)
author Craig Topper <craig.topper@gmail.com>
Mon, 27 Feb 2017 16:15:30 +0000 (16:15 +0000)
committer Craig Topper <craig.topper@gmail.com>
Mon, 27 Feb 2017 16:15:30 +0000 (16:15 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index d27b53abe7087c60b5c416f306ddf117ae4fe2a6..b471a99b4645449ff4fab8f3e62b1d1460070637 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -8060,9 +8060,9 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
  /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
  /// as many lanes with this technique as possible to simplify the remaining
  /// shuffle.
-static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
-                                                     SDValue V1, SDValue V2) {
-  SmallBitVector Zeroable(Mask.size(), false);
+static APInt computeZeroableShuffleElements(ArrayRef<int> Mask,
+                                            SDValue V1, SDValue V2) {
+  APInt Zeroable(Mask.size(), 0);
    V1 = peekThroughBitcasts(V1);
    V2 = peekThroughBitcasts(V2);
  
@@ -8077,7 +8077,7 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
      int M = Mask[i];
      // Handle the easy cases.
      if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
-      Zeroable[i] = true;
+      Zeroable.setBit(i);
        continue;
      }
  
@@ -8095,17 +8095,19 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
        int Scale = Size / V->getNumOperands();
        SDValue Op = V.getOperand(M / Scale);
        if (Op.isUndef() || X86::isZeroNode(Op))
-        Zeroable[i] = true;
+        Zeroable.setBit(i);
        else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
          APInt Val = Cst->getAPIntValue();
          Val = Val.lshr((M % Scale) * ScalarSizeInBits);
          Val = Val.getLoBits(ScalarSizeInBits);
-        Zeroable[i] = (Val == 0);
+        if (Val == 0)
+          Zeroable.setBit(i);
        } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
          APInt Val = Cst->getValueAPF().bitcastToAPInt();
          Val = Val.lshr((M % Scale) * ScalarSizeInBits);
          Val = Val.getLoBits(ScalarSizeInBits);
-        Zeroable[i] = (Val == 0);
+        if (Val == 0)
+          Zeroable.setBit(i);
        }
        continue;
      }
@@ -8119,7 +8121,8 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
          SDValue Op = V.getOperand((M * Scale) + j);
          AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
        }
-      Zeroable[i] = AllZeroable;
+      if (AllZeroable)
+        Zeroable.setBit(i);
        continue;
      }
    }
@@ -8134,12 +8137,12 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
  //
  // The function looks for a sub-mask that the nonzero elements are in
  // increasing order. If such sub-mask exist. The function returns true.
-static bool isNonZeroElementsInOrder(const SmallBitVector &Zeroable,
+static bool isNonZeroElementsInOrder(const APInt &Zeroable,
                                       ArrayRef<int> Mask, const EVT &VectorType,
                                       bool &IsZeroSideLeft) {
    int NextElement = -1;
    // Check if the Mask's nonzero elements are in increasing order.
-  for (int i = 0, e = Zeroable.size(); i < e; i++) {
+  for (int i = 0, e = Mask.size(); i < e; i++) {
      // Checks if the mask's zeros elements are built from only zeros.
      assert(Mask[i] >= -1 && "Out of bound mask element!");
      if (Mask[i] < 0)
@@ -8163,7 +8166,7 @@ static bool isNonZeroElementsInOrder(const SmallBitVector &Zeroable,
  static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
                                              ArrayRef<int> Mask, SDValue V1,
                                              SDValue V2,
-                                            const SmallBitVector &Zeroable,
+                                            const APInt &Zeroable,
                                              const X86Subtarget &Subtarget,
                                              SelectionDAG &DAG) {
    int Size = Mask.size();
@@ -8218,19 +8221,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
                             const X86Subtarget &Subtarget, SelectionDAG &DAG,
                             const SDLoc &dl);
  
-// Function convertBitVectorToUnsigned - The function gets SmallBitVector
-// as argument and convert him to unsigned.
-// The output of the function is not(zeroable)
-static unsigned convertBitVectorToUnsigned(const SmallBitVector &Zeroable) {
-  unsigned convertBit = 0;
-  for (int i = 0, e = Zeroable.size(); i < e; i++)
-    convertBit |= !(Zeroable[i]) << i;
-  return convertBit;
-}
-
  // X86 has dedicated shuffle that can be lowered to VEXPAND
  static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
-                                          const SmallBitVector &Zeroable,
+                                          const APInt &Zeroable,
                                            ArrayRef<int> Mask, SDValue &V1,
                                            SDValue &V2, SelectionDAG &DAG,
                                            const X86Subtarget &Subtarget) {
@@ -8238,7 +8231,7 @@ static SDValue lowerVectorShuffleToEXPAND(const SDLoc &DL, MVT VT,
    if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(),
                                  IsLeftZeroSide))
      return SDValue();
-  unsigned VEXPANDMask = convertBitVectorToUnsigned(Zeroable);
+  unsigned VEXPANDMask = (~Zeroable).getZExtValue();
    MVT IntegerType =
        MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8));
    SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType);
@@ -8372,7 +8365,7 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
  /// one of the inputs being zeroable.
  static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
                                             SDValue V2, ArrayRef<int> Mask,
-                                           const SmallBitVector &Zeroable,
+                                           const APInt &Zeroable,
                                             SelectionDAG &DAG) {
    assert(!VT.isFloatingPoint() && "Floating point types are not supported");
    MVT EltVT = VT.getVectorElementType();
@@ -8441,7 +8434,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
  /// that the shuffle mask is a blend, or convertible into a blend with zero.
  static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
                                           SDValue V2, ArrayRef<int> Original,
-                                         const SmallBitVector &Zeroable,
+                                         const APInt &Zeroable,
                                           const X86Subtarget &Subtarget,
                                           SelectionDAG &DAG) {
    bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
@@ -8899,7 +8892,7 @@ static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
  static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
                                       unsigned ScalarSizeInBits,
                                       ArrayRef<int> Mask, int MaskOffset,
-                                     const SmallBitVector &Zeroable,
+                                     const APInt &Zeroable,
                                       const X86Subtarget &Subtarget) {
    int Size = Mask.size();
    unsigned SizeInBits = Size * ScalarSizeInBits;
@@ -8961,7 +8954,7 @@ static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
  
  static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
                                           SDValue V2, ArrayRef<int> Mask,
-                                         const SmallBitVector &Zeroable,
+                                         const APInt &Zeroable,
                                           const X86Subtarget &Subtarget,
                                           SelectionDAG &DAG) {
    int Size = Mask.size();
@@ -8997,12 +8990,12 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
  /// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
  static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
                                             SDValue V2, ArrayRef<int> Mask,
-                                           const SmallBitVector &Zeroable,
+                                           const APInt &Zeroable,
                                             SelectionDAG &DAG) {
    int Size = Mask.size();
    int HalfSize = Size / 2;
    assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
-  assert(!Zeroable.all() && "Fully zeroable shuffle mask");
+  assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
  
    // Upper half must be undefined.
    if (!isUndefInRange(Mask, HalfSize, HalfSize))
@@ -9300,7 +9293,7 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
  /// are both incredibly common and often quite performance sensitive.
  static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
      const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+    const APInt &Zeroable, const X86Subtarget &Subtarget,
      SelectionDAG &DAG) {
    int Bits = VT.getSizeInBits();
    int NumLanes = Bits / 128;
@@ -9456,7 +9449,7 @@ static bool isShuffleFoldableLoad(SDValue V) {
  /// across all subtarget feature sets.
  static SDValue lowerVectorShuffleAsElementInsertion(
      const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+    const APInt &Zeroable, const X86Subtarget &Subtarget,
      SelectionDAG &DAG) {
    MVT ExtVT = VT;
    MVT EltVT = VT.getVectorElementType();
@@ -9810,7 +9803,7 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
  // elements are zeroable.
  static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
                                           unsigned &InsertPSMask,
-                                         const SmallBitVector &Zeroable,
+                                         const APInt &Zeroable,
                                           ArrayRef<int> Mask,
                                           SelectionDAG &DAG) {
    assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
@@ -9899,7 +9892,7 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
  
  static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
                                              SDValue V2, ArrayRef<int> Mask,
-                                            const SmallBitVector &Zeroable,
+                                            const APInt &Zeroable,
                                              SelectionDAG &DAG) {
    assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
    assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
@@ -10034,7 +10027,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
  /// it is better to avoid lowering through this for integer vectors where
  /// possible.
  static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -10116,7 +10109,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// it falls back to the floating point shuffle operation with appropriate bit
  /// casting.
  static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -10335,7 +10328,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
  /// domain crossing penalties, as these are sufficient to implement all v4f32
  /// shuffles.
  static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -10418,7 +10411,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// We try to handle these with integer-domain shuffles where we can, but for
  /// blends we use the floating point domain blend instructions.
  static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -10985,7 +10978,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
  /// blend if only one input is used.
  static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
      const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const SmallBitVector &Zeroable, SelectionDAG &DAG, bool &V1InUse,
+    const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse,
      bool &V2InUse) {
    SDValue V1Mask[16];
    SDValue V2Mask[16];
@@ -11046,7 +11039,7 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
  /// halves of the inputs separately (making them have relatively few inputs)
  /// and then concatenate them.
  static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -11230,7 +11223,7 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
  /// the existing lowering for v8i16 blends on each half, finally PACK-ing them
  /// back together.
  static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -11519,7 +11512,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// dispatches to the lowering routines accordingly.
  static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                          MVT VT, SDValue V1, SDValue V2,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
    switch (VT.SimpleTy) {
@@ -11775,7 +11768,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
  /// \brief Handle lowering 2-lane 128-bit shuffles.
  static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
                                          SDValue V2, ArrayRef<int> Mask,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
    SmallVector<int, 4> WidenedMask;
@@ -12310,7 +12303,7 @@ static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
  /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
  /// isn't available.
  static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -12407,7 +12400,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// This routine is only called when we have AVX2 and thus a reasonable
  /// instruction set for v4i64 shuffling..
  static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -12495,7 +12488,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
  /// isn't available.
  static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -12586,7 +12579,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// This routine is only called when we have AVX2 and thus a reasonable
  /// instruction set for v8i32 shuffling..
  static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -12690,7 +12683,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// This routine is only called when we have AVX2 and thus a reasonable
  /// instruction set for v16i16 shuffling..
  static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          SDValue V1, SDValue V2,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
@@ -12776,7 +12769,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// This routine is only called when we have AVX2 and thus a reasonable
  /// instruction set for v32i8 shuffling..
  static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -12849,7 +12842,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// together based on the available instructions.
  static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                          MVT VT, SDValue V1, SDValue V2,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
    // If we have a single input to the zero element, insert that into V1 if we
@@ -13001,7 +12994,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
  
  /// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
  static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -13057,7 +13050,7 @@ static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  
  /// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
  static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          SDValue V1, SDValue V2,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
@@ -13103,7 +13096,7 @@ static SDValue lowerV16F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  
  /// \brief Handle lowering of 8-lane 64-bit integer shuffles.
  static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -13168,7 +13161,7 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  
  /// \brief Handle lowering of 16-lane 32-bit integer shuffles.
  static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          SDValue V1, SDValue V2,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
@@ -13239,7 +13232,7 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  
  /// \brief Handle lowering of 32-lane 16-bit integer shuffles.
  static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          SDValue V1, SDValue V2,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
@@ -13290,7 +13283,7 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  
  /// \brief Handle lowering of 64-lane 8-bit integer shuffles.
  static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
-                                       const SmallBitVector &Zeroable,
+                                       const APInt &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -13350,7 +13343,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
  /// together based on the available instructions.
  static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                          MVT VT, SDValue V1, SDValue V2,
-                                        const SmallBitVector &Zeroable,
+                                        const APInt &Zeroable,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
    assert(Subtarget.hasAVX512() &&
@@ -13572,8 +13565,8 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
    // We actually see shuffles that are entirely re-arrangements of a set of
    // zero inputs. This mostly happens while decomposing complex shuffles into
    // simple ones. Directly lower these as a buildvector of zeros.
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-  if (Zeroable.all())
+  APInt Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+  if (Zeroable.isAllOnesValue())
      return getZeroVector(VT, Subtarget, DAG, DL);
  
    // Try to collapse shuffles into using a vector type with fewer elements but
@@ -26541,10 +26534,11 @@ static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
    unsigned NumMaskElts = Mask.size();
  
    bool ContainsZeros = false;
-  SmallBitVector Zeroable(NumMaskElts, false);
+  APInt Zeroable(NumMaskElts, false);
    for (unsigned i = 0; i != NumMaskElts; ++i) {
      int M = Mask[i];
-    Zeroable[i] = isUndefOrZero(M);
+    if (isUndefOrZero(M))
+      Zeroable.setBit(i);
      ContainsZeros |= (M == SM_SentinelZero);
    }
  
@@ -26825,12 +26819,12 @@ static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
    // Attempt to combine to INSERTPS.
    if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() &&
        MaskVT.is128BitVector()) {
-    SmallBitVector Zeroable(4, false);
+    APInt Zeroable(4, 0);
      for (unsigned i = 0; i != NumMaskElts; ++i)
        if (Mask[i] < 0)
-        Zeroable[i] = true;
+        Zeroable.setBit(i);
  
-    if (Zeroable.any() &&
+    if (Zeroable.getBoolValue() &&
          matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
        Shuffle = X86ISD::INSERTPS;
        ShuffleVT = MVT::v4f32;
author	Craig Topper <craig.topper@gmail.com>
	Mon, 27 Feb 2017 16:15:30 +0000 (16:15 +0000)
committer	Craig Topper <craig.topper@gmail.com>
	Mon, 27 Feb 2017 16:15:30 +0000 (16:15 +0000)