From 4dc104b36bf1d8bea65e8d06980a44bb42ab02d3 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Aug 2017 18:10:39 +0000 Subject: [PATCH] [InstCombine] Support (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2) for vector splats Note the original code I deleted incorrectly listed this as (X | C1) & C2 --> (X & C2^(C1&C2)) | C1 Which is only valid if C1 is a subset of C2. This relied on SimplifyDemandedBits to remove any extra bits from C1 before we got to that code. My new implementation avoids relying on that behavior so that it can be naively verified with alive. Differential Revision: https://reviews.llvm.org/D36384 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310272 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineAndOrXor.cpp | 31 ++++++++++--------- test/Transforms/InstCombine/or.ll | 31 +++++++++++++++++++ 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 1699a0ce89c..db8e6ecb0bf 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -126,21 +126,6 @@ Instruction *InstCombiner::OptAndOp(BinaryOperator *Op, switch (Op->getOpcode()) { default: break; - case Instruction::Or: - if (Op->hasOneUse()){ - ConstantInt *TogetherCI = dyn_cast(Together); - if (TogetherCI && !TogetherCI->isZero()){ - // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1 - // NOTE: This reduces the number of bits set in the & mask, which - // can expose opportunities for store narrowing. - Together = ConstantExpr::getXor(AndRHS, Together); - Value *And = Builder.CreateAnd(X, Together); - And->takeName(Op); - return BinaryOperator::CreateOr(And, OpRHS); - } - } - - break; case Instruction::Add: if (Op->hasOneUse()) { // Adding a one to a single bit bit-field should be turned into an XOR @@ -1223,6 +1208,22 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return BinaryOperator::CreateXor(And, NewC); } + const APInt *OrC; + if (match(Op0, m_OneUse(m_Or(m_Value(X), m_APInt(OrC))))) { + // (X | C1) & C2 --> (X & C2^(C1&C2)) | (C1&C2) + // NOTE: This reduces the number of bits set in the & mask, which + // can expose opportunities for store narrowing for scalars. + // NOTE: SimplifyDemandedBits should have already removed bits from C1 + // that aren't set in C2. Meaning we can replace (C1&C2) with C1 in + // above, but this feels safer. + APInt Together = *C & *OrC; + Value *And = Builder.CreateAnd(X, ConstantInt::get(I.getType(), + Together ^ *C)); + And->takeName(Op0); + return BinaryOperator::CreateOr(And, ConstantInt::get(I.getType(), + Together)); + } + // If the mask is only needed on one incoming arm, push the 'and' op up. if (match(Op0, m_OneUse(m_Xor(m_Value(X), m_Value(Y)))) || match(Op0, m_OneUse(m_Or(m_Value(X), m_Value(Y))))) { diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll index fb56449ba4d..6ae52d45426 100644 --- a/test/Transforms/InstCombine/or.ll +++ b/test/Transforms/InstCombine/or.ll @@ -268,6 +268,21 @@ define i32 @test30(i32 %A) { ret i32 %E } +define <2 x i32> @test30vec(<2 x i32> %A) { +; CHECK-LABEL: @test30vec( +; CHECK-NEXT: [[C:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i32> [[A]], +; CHECK-NEXT: [[D:%.*]] = or <2 x i32> [[B]], +; CHECK-NEXT: [[E:%.*]] = or <2 x i32> [[D]], [[C]] +; CHECK-NEXT: ret <2 x i32> [[E]] +; + %B = or <2 x i32> %A, + %C = and <2 x i32> %A, + %D = and <2 x i32> %B, + %E = or <2 x i32> %D, %C + ret <2 x i32> %E +} + ; PR4216 define i64 @test31(i64 %A) { ; CHECK-LABEL: @test31( @@ -285,6 +300,22 @@ define i64 @test31(i64 %A) { ret i64 %F } +define <2 x i64> @test31vec(<2 x i64> %A) { +; CHECK-LABEL: @test31vec( +; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[A:%.*]], +; CHECK-NEXT: [[F:%.*]] = or <2 x i64> [[E]], +; CHECK-NEXT: ret <2 x i64> [[F]] +; + %B = or <2 x i64> %A, + %D = and <2 x i64> %B, + + %C = or <2 x i64> %A, + %E = and <2 x i64> %C, + + %F = or <2 x i64> %D, %E + ret <2 x i64> %F +} + ; codegen is mature enough to handle vector selects. define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191) { ; CHECK-LABEL: @test32( -- 2.50.1