From: Philip Reames Date: Sat, 6 Jul 2019 04:28:00 +0000 (+0000) Subject: [IRBuilder] Fold consistently for or/and whether constant is LHS or RHS X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=531d8c496111c5db1ef893bce9581812e1d238d4;p=llvm [IRBuilder] Fold consistently for or/and whether constant is LHS or RHS Without this, we have the unfortunate property that tests are dependent on the order of operads passed the CreateOr and CreateAnd functions. In actual usage, we'd promptly optimize them away, but it made tests slightly more verbose than they should have been. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365260 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h index 980f7345a40..d852c21e787 100644 --- a/include/llvm/IR/IRBuilder.h +++ b/include/llvm/IR/IRBuilder.h @@ -1197,6 +1197,9 @@ public: } Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") { + if (auto *LC = dyn_cast(LHS)) + if (LC->isMinusOne()) + return RHS; // -1 & RHS = RHS if (auto *RC = dyn_cast(RHS)) { if (isa(RC) && cast(RC)->isMinusOne()) return LHS; // LHS & -1 -> LHS @@ -1223,6 +1226,9 @@ public: } Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") { + if (auto *LC = dyn_cast(LHS)) + if (LC->isNullValue()) + return RHS; // 0 | RHS -> RHS if (auto *RC = dyn_cast(RHS)) { if (RC->isNullValue()) return LHS; // LHS | 0 -> LHS diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll index fa6fccecbf1..52c192a8b2e 100644 --- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll +++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll @@ -53,8 +53,6 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]] ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]] -; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]] - ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 @@ -65,7 +63,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]] -; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]] +; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]] ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph define void @f1(i16* noalias %a, i16* noalias %b, i64 %N) { @@ -147,9 +145,6 @@ for.end: ; preds = %for.body ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]] ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]] - -; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]] - ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 @@ -160,7 +155,7 @@ for.end: ; preds = %for.body ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]] -; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]] +; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]] ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph define void @f2(i16* noalias %a, i16* noalias %b, i64 %N) { @@ -227,9 +222,6 @@ for.end: ; preds = %for.body ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]] ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]] - -; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]] - ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 @@ -240,7 +232,7 @@ for.end: ; preds = %for.body ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]] -; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]] +; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]] ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph define void @f3(i16* noalias %a, i16* noalias %b, i64 %N) { @@ -303,9 +295,6 @@ for.end: ; preds = %for.body ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]] ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]] - -; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]] - ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 @@ -316,7 +305,7 @@ for.end: ; preds = %for.body ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]] -; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]] +; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]] ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph define void @f4(i16* noalias %a, i16* noalias %b, i64 %N) { @@ -381,9 +370,6 @@ for.end: ; preds = %for.body ; LV-NEXT: [[BECheck:%[^ ]*]] = icmp ugt i64 [[BE]], 4294967295 ; LV-NEXT: [[CheckOr0:%[^ ]*]] = or i1 [[Cmp]], [[BECheck]] ; LV-NEXT: [[PredCheck0:%[^ ]*]] = or i1 [[CheckOr0]], [[OFMulOverflow]] - -; LV-NEXT: [[Or0:%[^ ]*]] = or i1 false, [[PredCheck0]] - ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 @@ -394,7 +380,7 @@ for.end: ; preds = %for.body ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 true, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]] -; LV: [[FinalCheck:%[^ ]*]] = or i1 [[Or0]], [[PredCheck1]] +; LV: [[FinalCheck:%[^ ]*]] = or i1 [[PredCheck0]], [[PredCheck1]] ; LV: br i1 [[FinalCheck]], label %for.body.ph.lver.orig, label %for.body.ph define void @f5(i16* noalias %a, i16* noalias %b, i64 %N) { diff --git a/test/Instrumentation/BoundsChecking/opt.ll b/test/Instrumentation/BoundsChecking/opt.ll index d0bb41f555b..772a5fa4464 100644 --- a/test/Instrumentation/BoundsChecking/opt.ll +++ b/test/Instrumentation/BoundsChecking/opt.ll @@ -50,7 +50,6 @@ for.body.i: ; preds = %for.body.i, %entry ; CHECK: mul i64 {{.*}}, 4 ; CHECK: sub i64 4000, % ; CHECK-NEXT: icmp ult i64 {{.*}}, 4 -; CHECK-NEXT: or i1 ; CHECK: trap %1 = load i32, i32* %arrayidx.i, align 4 %add.i = add nsw i32 %1, %sum.01.i @@ -243,7 +242,6 @@ for.body4: ; preds = %for.body4, %for.con ; CHECK: add i64 ; CHECK: sub i64 16, % ; CHECK-NEXT: icmp ult i64 {{.*}}, 4 -; CHECK-NEXT: or i1 ; CHECK: trap %1 = load i32, i32* %arrayidx7, align 4 %add = add nsw i32 %1, %sum.119 diff --git a/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll index 8ad19cb4707..919202342c9 100644 --- a/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ b/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -25,18 +25,17 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias % ; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295 ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] ; CHECK-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[A2]], [[MUL_RESULT4]] +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i64 [[TMP11]], [[A2]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i64 [[TMP10]], [[A2]] +; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW5]] +; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP9]], [[TMP15]] +; CHECK-NEXT: br i1 [[TMP16]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -70,14 +69,14 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias % ; CHECK-NEXT: [[MUL_LDIST1:%.*]] = mul i32 [[IND1_LDIST1]], 2 ; CHECK-NEXT: [[MUL_EXT_LDIST1:%.*]] = zext i32 [[MUL_LDIST1]] to i64 ; CHECK-NEXT: [[ARRAYIDXA_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4 +; CHECK-NEXT: [[LOADA_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXA_LDIST1]], align 4, !alias.scope !0 ; CHECK-NEXT: [[ARRAYIDXB_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[MUL_EXT_LDIST1]] -; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4 +; CHECK-NEXT: [[LOADB_LDIST1:%.*]] = load i32, i32* [[ARRAYIDXB_LDIST1]], align 4, !alias.scope !3 ; CHECK-NEXT: [[MULA_LDIST1:%.*]] = mul i32 [[LOADB_LDIST1]], [[LOADA_LDIST1]] ; CHECK-NEXT: [[ADD_LDIST1]] = add nuw nsw i64 [[IND_LDIST1]], 1 ; CHECK-NEXT: [[INC1_LDIST1]] = add i32 [[IND1_LDIST1]], 1 ; CHECK-NEXT: [[ARRAYIDXA_PLUS_4_LDIST1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[ADD_LDIST1]] -; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4 +; CHECK-NEXT: store i32 [[MULA_LDIST1]], i32* [[ARRAYIDXA_PLUS_4_LDIST1]], align 4, !alias.scope !5 ; CHECK-NEXT: [[EXITCOND_LDIST1:%.*]] = icmp eq i64 [[ADD_LDIST1]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_LDIST1]], label [[FOR_BODY_PH:%.*]], label [[FOR_BODY_LDIST1]] ; CHECK: for.body.ph: @@ -90,12 +89,12 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias % ; CHECK-NEXT: [[ADD]] = add nuw nsw i64 [[IND]], 1 ; CHECK-NEXT: [[INC1]] = add i32 [[IND1]], 1 ; CHECK-NEXT: [[ARRAYIDXD:%.*]] = getelementptr inbounds i32, i32* [[D]], i64 [[MUL_EXT]] -; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4 +; CHECK-NEXT: [[LOADD:%.*]] = load i32, i32* [[ARRAYIDXD]], align 4, !alias.scope !7 ; CHECK-NEXT: [[ARRAYIDXE:%.*]] = getelementptr inbounds i32, i32* [[E]], i64 [[MUL_EXT]] -; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4 +; CHECK-NEXT: [[LOADE:%.*]] = load i32, i32* [[ARRAYIDXE]], align 4, !alias.scope !9 ; CHECK-NEXT: [[MULC:%.*]] = mul i32 [[LOADD]], [[LOADE]] ; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] -; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 +; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4, !alias.scope !11 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]] ; CHECK: for.end: diff --git a/test/Transforms/LoopPredication/basic.ll b/test/Transforms/LoopPredication/basic.ll index 88fa1bb95b2..52a05ed824a 100644 --- a/test/Transforms/LoopPredication/basic.ll +++ b/test/Transforms/LoopPredication/basic.ll @@ -232,12 +232,11 @@ define i32 @signed_loop_0_to_n_ult_check_length_range_known(i32* %array, i32* %l ; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sle i32 [[N]], [[LENGTH]] -; CHECK-NEXT: [[TMP1:%.*]] = and i1 true, [[TMP0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP1]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP0]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 diff --git a/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll b/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll index 85d4be0d702..588cc017165 100644 --- a/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll +++ b/test/Transforms/LoopPredication/basic_widenable_branch_guards.ll @@ -282,14 +282,13 @@ define i32 @signed_loop_0_to_n_ult_check_length_range_known(i32* %array, i32* %l ; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = icmp sle i32 [[N]], [[LENGTH]] -; CHECK-NEXT: [[TMP1:%.*]] = and i1 true, [[TMP0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[WIDENABLE_COND]] +; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTCALL:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 9) [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTCALL]] diff --git a/test/Transforms/LoopPredication/invariant_load.ll b/test/Transforms/LoopPredication/invariant_load.ll index ffdc38274ef..83b906f23df 100644 --- a/test/Transforms/LoopPredication/invariant_load.ll +++ b/test/Transforms/LoopPredication/invariant_load.ll @@ -385,14 +385,13 @@ define i32 @constant_length(i32* %array, i32 %n) { ; CHECK-NEXT: br i1 [[TMP5]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], 20 -; CHECK-NEXT: [[TMP1:%.*]] = and i1 true, [[TMP0]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[UNKNOWN:%.*]] = load volatile i1, i1* @UNKNOWN ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[UNKNOWN]]) [ "deopt"() ] -; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP1]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP0]], i32 9) [ "deopt"() ] ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_I:%.*]] = load i32, i32* [[ARRAY_I_PTR]], align 4 diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll index 43c834ed808..da1ce0eef26 100644 --- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll +++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll @@ -71,34 +71,33 @@ define void @foo(i32* nocapture %a, i32* nocapture %b, i32 %k, i32 %m) #0 { ; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP10]], [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 false, i1 [[TMP12]], i1 [[TMP13]] ; CHECK-NEXT: [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 false, [[TMP15]] -; CHECK-NEXT: br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP15]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = trunc i64 [[INDEX]] to i32 -; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[TMP17]], 0 -; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[ADD_US]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = sext i32 [[TMP19]] to i64 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0 -; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP23]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP24]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 0 +; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[ADD_US]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP23]], i32 0 +; CHECK-NEXT: store i32 [[TMP24]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP23]], i32 1 ; CHECK-NEXT: store i32 [[TMP25]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP24]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP23]], i32 2 ; CHECK-NEXT: store i32 [[TMP26]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP24]], i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP23]], i32 3 ; CHECK-NEXT: store i32 [[TMP27]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[TMP24]], i32 3 -; CHECK-NEXT: store i32 [[TMP28]], i32* [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5 ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]] diff --git a/test/Transforms/LoopVectorize/X86/pr35432.ll b/test/Transforms/LoopVectorize/X86/pr35432.ll index 6aaa13c183a..db5e0ed8da3 100644 --- a/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -40,16 +40,16 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]] +; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMIN]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[CONV3]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32 ; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]] -; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]] +; CHECK-NEXT: [[UMIN1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMIN1]] ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 ; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]]) ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 @@ -62,48 +62,47 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255 ; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] ; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]] -; CHECK-NEXT: [[TMP21:%.*]] = or i1 false, [[TMP20]] -; CHECK-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[TMP20]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]] ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]] -; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP21]], [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP22:%.*]] = trunc i32 [[INDEX]] to i8 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP22]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], ; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4 -; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], -; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], -4 +; CHECK-NEXT: [[TMP25]] = add <4 x i32> [[VEC_PHI]], +; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI2]], +; CHECK-NEXT: [[TMP27:%.*]] = add i8 [[TMP23]], -1 ; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[TMP24]], -1 -; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[TMP25]], -1 +; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP27]] to i32 ; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32 -; CHECK-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 +; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP26]], [[TMP25]] ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF]] ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX4]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0 +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP32]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] @@ -114,7 +113,7 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]] ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2 ; CHECK: for.cond4.for.inc9_crit_edge: -; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP32]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 ; CHECK-NEXT: br label [[FOR_INC9]] ; CHECK: for.inc9: diff --git a/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll index d9c9632be04..be4d2f90291 100644 --- a/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll +++ b/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll @@ -33,9 +33,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) ; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow -; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]] ; CHECK: %ident.check = icmp ne i32 {{.*}}, %{{.*}} -; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check +; CHECK: %{{.*}} = or i1 %[[TEST]], %ident.check ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) ; CHECK: vector.body: ; CHECK: <4 x i32> @@ -92,10 +91,9 @@ for.end: ; CHECK: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) ; CHECK: %[[TEST:[0-9]+]] = or i1 {{.*}}, %mul.overflow -; CHECK: %[[NTEST:[0-9]+]] = or i1 false, %[[TEST]] ; CHECK: %[[EXT:[0-9]+]] = sext i8 {{.*}} to i32 ; CHECK: %ident.check = icmp ne i32 {{.*}}, %[[EXT]] -; CHECK: %{{.*}} = or i1 %[[NTEST]], %ident.check +; CHECK: %{{.*}} = or i1 %[[TEST]], %ident.check ; CHECK-NOT: %mul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 {{.*}}, i8 {{.*}}) ; CHECK: vector.body: ; CHECK: <4 x i32>