if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
// ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
// of X and OP behaves well when given trunc(C1) and X.
+ // TODO: Do this for vectors by using m_APInt isntead of m_ConstantInt.
switch (Op0I->getOpcode()) {
default:
break;
case Instruction::Sub:
Value *X;
ConstantInt *C1;
- if (match(Op0I, m_c_BinOp(m_ZExt(m_Value(X)), m_ConstantInt(C1)))) {
+ // TODO: The one use restrictions could be relaxed a little if the AND
+ // is going to be removed.
+ if (match(Op0I, m_OneUse(m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))),
+ m_ConstantInt(C1))))) {
if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
Value *BinOp;
; CHECK-NEXT: [[TMP3:%.*]] = zext i64 [[TMP2]] to i128
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i128 [[TMP3]], 11795372955171141389
; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP4]], 64
-; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP2]], -6651371118538410227
-; CHECK-NEXT: [[DOTMASKED:%.*]] = zext i64 [[TMP6]] to i128
-; CHECK-NEXT: [[TMP7:%.*]] = xor i128 [[TMP5]], [[DOTMASKED]]
-; CHECK-NEXT: [[TMP8:%.*]] = mul nuw nsw i128 [[TMP7]], 1946526487930394057
-; CHECK-NEXT: [[TMP9:%.*]] = lshr i128 [[TMP8]], 64
-; CHECK-NEXT: [[TMP10:%.*]] = xor i128 [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[TMP11:%.*]] = trunc i128 [[TMP10]] to i64
-; CHECK-NEXT: ret i64 [[TMP11]]
+; CHECK-NEXT: [[DOTMASKED:%.*]] = and i128 [[TMP4]], 18446744073709551615
+; CHECK-NEXT: [[TMP6:%.*]] = xor i128 [[TMP5]], [[DOTMASKED]]
+; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw i128 [[TMP6]], 1946526487930394057
+; CHECK-NEXT: [[TMP8:%.*]] = lshr i128 [[TMP7]], 64
+; CHECK-NEXT: [[TMP9:%.*]] = xor i128 [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc i128 [[TMP9]] to i64
+; CHECK-NEXT: ret i64 [[TMP10]]
;
%1 = load i64, i64* @wyhash64_x, align 8
%2 = add i64 %1, 6971258582664805397
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4
-; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP2]], 3
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP4]] to i64
-; CHECK-NEXT: [[N_VEC:%.*]] = sub nuw nsw i64 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[N]], 3
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP6]] to i64
-; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]], !dbg !9
; CHECK-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]], !dbg !9
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[N]], 3, !dbg !9
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = zext i32 [[TMP6]] to i64, !dbg !9
-; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[N_MOD_VF]], !dbg !9
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588, !dbg !9
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], !dbg !9
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg !9
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]], !dbg !9
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !dbg !9, !llvm.loop !15
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], 0
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]], !dbg !9
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]