From e930b092c40863aa23ca4ac41565958d9b348140 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 12 Apr 2017 15:11:33 +0000 Subject: [PATCH] [InstCombine] morph an existing instruction instead of creating a new one One potential way to make InstCombine (very slightly?) faster is to recycle instructions when possible instead of creating new ones. It's not explicitly stated AFAIK, but we don't consider this an "InstSimplify". We could, however, make a new layer to house transforms like this if that makes InstCombine more manageable (just throwing out an idea; not sure how much opportunity is actually here). Differential Revision: https://reviews.llvm.org/D31863 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300067 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 13 ++++++------- test/Transforms/InstCombine/select-cmp-br.ll | 4 ++-- test/Transforms/LoopVectorize/if-conversion.ll | 4 ++-- test/Transforms/SimplifyCFG/merge-cond-stores.ll | 4 ++-- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ea8c0a6d38b..99a983ab474 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2408,13 +2408,12 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) { } } - if (Constant *RHS = dyn_cast(Op1)) { - if (RHS->isAllOnesValue() && Op0->hasOneUse()) - // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B - if (CmpInst *CI = dyn_cast(Op0)) - return CmpInst::Create(CI->getOpcode(), - CI->getInversePredicate(), - CI->getOperand(0), CI->getOperand(1)); + // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B + ICmpInst::Predicate Pred; + if (match(Op0, m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))) && + match(Op1, m_AllOnes())) { + cast(Op0)->setPredicate(CmpInst::getInversePredicate(Pred)); + return replaceInstUsesWith(I, Op0); } if (ConstantInt *RHSC = dyn_cast(Op1)) { diff --git a/test/Transforms/InstCombine/select-cmp-br.ll b/test/Transforms/InstCombine/select-cmp-br.ll index e07bfd10899..59384ab7b1f 100644 --- a/test/Transforms/InstCombine/select-cmp-br.ll +++ b/test/Transforms/InstCombine/select-cmp-br.ll @@ -15,8 +15,8 @@ define void @test1(%C* %arg) { ; CHECK-NEXT: [[M:%.*]] = load i64*, i64** [[TMP]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0 ; CHECK-NEXT: [[N:%.*]] = load i64*, i64** [[TMP1]], align 8 -; CHECK-NEXT: [[TMP71:%.*]] = icmp eq %C* [[ARG]], null ; CHECK-NEXT: [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]] +; CHECK-NEXT: [[TMP71:%.*]] = icmp eq %C* [[ARG]], null ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]] ; CHECK-NEXT: br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]] ; CHECK: bb: @@ -115,8 +115,8 @@ define void @test3(%C* %arg) { ; CHECK-NEXT: [[M:%.*]] = load i64*, i64** [[TMP]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[C]], %C* [[ARG]], i64 1, i32 0, i32 0 ; CHECK-NEXT: [[N:%.*]] = load i64*, i64** [[TMP1]], align 8 -; CHECK-NEXT: [[TMP71:%.*]] = icmp eq %C* [[ARG]], null ; CHECK-NEXT: [[NOT_TMP5:%.*]] = icmp ne i64* [[M]], [[N]] +; CHECK-NEXT: [[TMP71:%.*]] = icmp eq %C* [[ARG]], null ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP71]], [[NOT_TMP5]] ; CHECK-NEXT: br i1 [[TMP7]], label [[BB10:%.*]], label [[BB8:%.*]] ; CHECK: bb: diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll index acf7b12540d..d3a16e2075d 100644 --- a/test/Transforms/LoopVectorize/if-conversion.ll +++ b/test/Transforms/LoopVectorize/if-conversion.ll @@ -18,9 +18,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ;CHECK-LABEL: @function0( ;CHECK: load <4 x i32> +;CHECK: icmp sle <4 x i32> ;CHECK: mul <4 x i32> ;CHECK: add <4 x i32> -;CHECK: icmp sle <4 x i32> ;CHECK: select <4 x i1> ;CHECK: ret i32 define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp { @@ -71,8 +71,8 @@ for.end: ;CHECK-LABEL: @reduction_func( ;CHECK: load <4 x i32> -;CHECK: add <4 x i32> ;CHECK: icmp slt <4 x i32> +;CHECK: add <4 x i32> ;CHECK: select <4 x i1> ;CHECK: ret i32 define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp { diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/test/Transforms/SimplifyCFG/merge-cond-stores.ll index baec5244219..d5d0224a4b2 100644 --- a/test/Transforms/SimplifyCFG/merge-cond-stores.ll +++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll @@ -5,8 +5,8 @@ define void @test_simple(i32* %p, i32 %a, i32 %b) { ; CHECK-LABEL: @test_simple( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[A:%.*]], 0 +; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X2]], true ; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] @@ -43,8 +43,8 @@ define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: [[TMP0:%.*]] = or i32 [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: [[X4:%.*]] = icmp eq i32 [[D:%.*]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TMP0]], [[C:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[X4]], true ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[X4]], true ; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[TMP2]] ; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP6:%.*]] ; CHECK: [[X3:%.*]] = icmp eq i32 [[C]], 0 -- 2.40.0