From 6eef5df328e31bcb5054d517ffe64e4ccc28b14c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 5 Aug 2017 20:00:41 +0000 Subject: [PATCH] [InstCombine] Support vector splats in foldSelectICmpAnd. Unfortunately, it looks like there's some other missed optimizations in the generated code for some of these cases. I'll try to look at some of those next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310184 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineSelect.cpp | 38 +++++++----- .../InstCombine/select-with-bitwise-ops.ll | 59 +++++++++++++++++++ 2 files changed, 82 insertions(+), 15 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index b5b8aa33267..92f3c1d9d89 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -597,18 +597,24 @@ canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp, /// icmp instruction with zero, and we have an 'and' with the non-constant value /// and a power of two we can turn the select into a shift on the result of the /// 'and'. -static Value *foldSelectICmpAnd(const SelectInst &SI, const ICmpInst *IC, +static Value *foldSelectICmpAnd(Type *SelType, const ICmpInst *IC, APInt TrueVal, APInt FalseVal, InstCombiner::BuilderTy &Builder) { - if (!IC->isEquality() || !SI.getType()->isIntegerTy()) + assert(SelType->isIntOrIntVectorTy() && "Not an integer select?"); + + // If this is a vector select, we need a vector compare. + if (SelType->isVectorTy() != IC->getType()->isVectorTy()) + return nullptr; + + if (!IC->isEquality()) return nullptr; if (!match(IC->getOperand(1), m_Zero())) return nullptr; - ConstantInt *AndRHS; + const APInt *AndRHS; Value *LHS = IC->getOperand(0); - if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS)))) + if (!match(LHS, m_And(m_Value(), m_Power2(AndRHS)))) return nullptr; // If both select arms are non-zero see if we have a select of the form @@ -628,28 +634,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, const ICmpInst *IC, FalseVal -= Offset; } - // Make sure the mask in the 'and' and one of the select arms is a power of 2. - if (!AndRHS->getValue().isPowerOf2() || - (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2())) + // Make sure one of the select arms is a power of 2. + if (!TrueVal.isPowerOf2() && !FalseVal.isPowerOf2()) return nullptr; // Determine which shift is needed to transform result of the 'and' into the // desired result. const APInt &ValC = !TrueVal.isNullValue() ? TrueVal : FalseVal; unsigned ValZeros = ValC.logBase2(); - unsigned AndZeros = AndRHS->getValue().logBase2(); + unsigned AndZeros = AndRHS->logBase2(); // If types don't match we can still convert the select by introducing a zext // or a trunc of the 'and'. Value *V = LHS; if (ValZeros > AndZeros) { - V = Builder.CreateZExtOrTrunc(V, SI.getType()); + V = Builder.CreateZExtOrTrunc(V, SelType); V = Builder.CreateShl(V, ValZeros - AndZeros); } else if (ValZeros < AndZeros) { V = Builder.CreateLShr(V, AndZeros - ValZeros); - V = Builder.CreateZExtOrTrunc(V, SI.getType()); + V = Builder.CreateZExtOrTrunc(V, SelType); } else - V = Builder.CreateZExtOrTrunc(V, SI.getType()); + V = Builder.CreateZExtOrTrunc(V, SelType); // Okay, now we know that everything is set up, we just don't know whether we // have a icmp_ne or icmp_eq and whether the true or false val is the zero. @@ -670,11 +675,14 @@ Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI, Value *TrueVal = SI.getTrueValue(); Value *FalseVal = SI.getFalseValue(); - if (ConstantInt *TrueValC = dyn_cast(TrueVal)) - if (ConstantInt *FalseValC = dyn_cast(FalseVal)) - if (Value *V = foldSelectICmpAnd(SI, ICI, TrueValC->getValue(), - FalseValC->getValue(), Builder)) + { + const APInt *TrueValC, *FalseValC; + if (match(TrueVal, m_APInt(TrueValC)) && + match(FalseVal, m_APInt(FalseValC))) + if (Value *V = foldSelectICmpAnd(SI.getType(), ICI, *TrueValC, + *FalseValC, Builder)) return replaceInstUsesWith(SI, V); + } if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, Builder)) return NewSel; diff --git a/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/test/Transforms/InstCombine/select-with-bitwise-ops.ll index c92a749f724..7580fadfd89 100644 --- a/test/Transforms/InstCombine/select-with-bitwise-ops.ll +++ b/test/Transforms/InstCombine/select-with-bitwise-ops.ll @@ -268,6 +268,21 @@ define i32 @test65(i64 %x) { ret i32 %3 } +define <2 x i32> @test65vec(<2 x i64> %x) { +; CHECK-LABEL: @test65vec( +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %1 = and <2 x i64> %x, + %2 = icmp ne <2 x i64> %1, zeroinitializer + %3 = select <2 x i1> %2, <2 x i32> , <2 x i32> + ret <2 x i32> %3 +} + define i32 @test66(i64 %x) { ; CHECK-LABEL: @test66( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[X:%.*]], 31 @@ -282,6 +297,35 @@ define i32 @test66(i64 %x) { ret i32 %3 } +define <2 x i32> @test66vec(<2 x i64> %x) { +; CHECK-LABEL: @test66vec( +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %1 = and <2 x i64> %x, + %2 = icmp ne <2 x i64> %1, zeroinitializer + %3 = select <2 x i1> %2, <2 x i32> , <2 x i32> + ret <2 x i32> %3 +} + +; Make sure we don't try to optimize a scalar 'and' with a vector select. +define <2 x i32> @test66vec_scalar_and(i64 %x) { +; CHECK-LABEL: @test66vec_scalar_and( +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], <2 x i32> , <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %1 = and i64 %x, 4294967296 + %2 = icmp ne i64 %1, 0 + %3 = select i1 %2, <2 x i32> , <2 x i32> + ret <2 x i32> %3 +} + define i32 @test67(i16 %x) { ; CHECK-LABEL: @test67( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 %x, 1 @@ -296,6 +340,21 @@ define i32 @test67(i16 %x) { ret i32 %3 } +define <2 x i32> @test67vec(<2 x i16> %x) { +; CHECK-LABEL: @test67vec( +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = lshr exact <2 x i16> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i16> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i16> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; + %1 = and <2 x i16> %x, + %2 = icmp ne <2 x i16> %1, zeroinitializer + %3 = select <2 x i1> %2, <2 x i32> , <2 x i32> + ret <2 x i32> %3 +} + define i32 @test68(i32 %x, i32 %y) { ; CHECK-LABEL: @test68( ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 6 -- 2.50.1