/// icmp instruction with zero, and we have an 'and' with the non-constant value
/// and a power of two we can turn the select into a shift on the result of the
/// 'and'.
-static Value *foldSelectICmpAnd(const SelectInst &SI, const ICmpInst *IC,
+static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC,
APInt TrueVal, APInt FalseVal,
InstCombiner::BuilderTy &Builder) {
- if (!IC->isEquality() || !SI.getType()->isIntegerTy())
+ assert(SelType->isIntOrIntVectorTy() && "Not an integer select?");
+
+ // If this is a vector select, we need a vector compare.
+ if (SelType->isVectorTy() != IC->getType()->isVectorTy())
+ return nullptr;
+
+ if (!IC->isEquality())
return nullptr;
if (!match(IC->getOperand(1), m_Zero()))
return nullptr;
- ConstantInt *AndRHS;
+ const APInt *AndRHS;
Value *LHS = IC->getOperand(0);
- if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+ if (!match(LHS, m_And(m_Value(), m_Power2(AndRHS))))
return nullptr;
// If both select arms are non-zero see if we have a select of the form
FalseVal -= Offset;
}
- // Make sure the mask in the 'and' and one of the select arms is a power of 2.
- if (!AndRHS->getValue().isPowerOf2() ||
- (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()))
+ // Make sure one of the select arms is a power of 2.
+ if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())
return nullptr;
// Determine which shift is needed to transform result of the 'and' into the
// desired result.
const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal;
unsigned ValZeros = ValC.logBase2();
- unsigned AndZeros = AndRHS->getValue().logBase2();
+ unsigned AndZeros = AndRHS->logBase2();
// If types don't match we can still convert the select by introducing a zext
// or a trunc of the 'and'.
Value *V = LHS;
if (ValZeros > AndZeros) {
- V = Builder.CreateZExtOrTrunc(V, SI.getType());
+ V = Builder.CreateZExtOrTrunc(V, SelType);
V = Builder.CreateShl(V, ValZeros - AndZeros);
} else if (ValZeros < AndZeros) {
V = Builder.CreateLShr(V, AndZeros - ValZeros);
- V = Builder.CreateZExtOrTrunc(V, SI.getType());
+ V = Builder.CreateZExtOrTrunc(V, SelType);
} else
- V = Builder.CreateZExtOrTrunc(V, SI.getType());
+ V = Builder.CreateZExtOrTrunc(V, SelType);
// Okay, now we know that everything is set up, we just don't know whether we
// have a icmp_ne or icmp_eq and whether the true or false val is the zero.
Value *TrueVal = SI.getTrueValue();
Value *FalseVal = SI.getFalseValue();
- if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
- if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal))
- if (Value *V = foldSelectICmpAnd(SI, ICI, TrueValC->getValue(),
- FalseValC->getValue(), Builder))
+ {
+ const APInt *TrueValC, *FalseValC;
+ if (match(TrueVal, m_APInt(TrueValC)) &&
+ match(FalseVal, m_APInt(FalseValC)))
+ if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, *TrueValC,
+ *FalseValC, Builder))
return replaceInstUsesWith(SI, V);
+ }
if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder))
return NewSel;
ret i32 %3
}
+define <2 x i32> @test65vec(<2 x i64> %x) {
+; CHECK-LABEL: @test65vec(
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 16, i64 16>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], <i64 3, i64 3>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], <i32 40, i32 40>
+; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 2, i32 2>
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %1 = and <2 x i64> %x, <i64 16, i64 16>
+ %2 = icmp ne <2 x i64> %1, zeroinitializer
+ %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+ ret <2 x i32> %3
+}
+
define i32 @test66(i64 %x) {
; CHECK-LABEL: @test66(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 31
ret i32 %3
}
+define <2 x i32> @test66vec(<2 x i64> %x) {
+; CHECK-LABEL: @test66vec(
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 4294967296, i64 4294967296>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], <i64 31, i64 31>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], <i32 40, i32 40>
+; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 2, i32 2>
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %1 = and <2 x i64> %x, <i64 4294967296, i64 4294967296>
+ %2 = icmp ne <2 x i64> %1, zeroinitializer
+ %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+ ret <2 x i32> %3
+}
+
+; Make sure we don't try to optimize a scalar 'and' with a vector select.
+define <2 x i32> @test66vec_scalar_and(i64 %x) {
+; CHECK-LABEL: @test66vec_scalar_and(
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %1 = and i64 %x, 4294967296
+ %2 = icmp ne i64 %1, 0
+ %3 = select i1 %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+ ret <2 x i32> %3
+}
+
define i32 @test67(i16 %x) {
; CHECK-LABEL: @test67(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 %x, 1
ret i32 %3
}
+define <2 x i32> @test67vec(<2 x i16> %x) {
+; CHECK-LABEL: @test67vec(
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 4, i16 4>
+; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i16> [[TMP1]], <i16 1, i16 1>
+; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i16> [[TMP2]], <i16 40, i16 40>
+; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP3]], <i16 2, i16 2>
+; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %1 = and <2 x i16> %x, <i16 4, i16 4>
+ %2 = icmp ne <2 x i16> %1, zeroinitializer
+ %3 = select <2 x i1> %2, <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+ ret <2 x i32> %3
+}
+
define i32 @test68(i32 %x, i32 %y) {
; CHECK-LABEL: @test68(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6