return nullptr;
}
-Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+Value *InstCombiner::foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS,
+ BinaryOperator &I) {
+ assert(I.getOpcode() == Instruction::Xor && I.getOperand(0) == LHS &&
+ I.getOperand(1) == RHS && "Should be 'xor' with these operands");
+
if (predicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
if (LHS->getOperand(0) == RHS->getOperand(1) &&
LHS->getOperand(1) == RHS->getOperand(0))
// TODO: If OrICmp is false, the whole thing is false (InstSimplify?).
if (Value *AndICmp = SimplifyBinOp(Instruction::And, LHS, RHS, SQ)) {
// TODO: Independently handle cases where the 'and' side is a constant.
- if (OrICmp == LHS && AndICmp == RHS && RHS->hasOneUse()) {
- // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS
- RHS->setPredicate(RHS->getInversePredicate());
- return Builder.CreateAnd(LHS, RHS);
+ ICmpInst *X = nullptr, *Y = nullptr;
+ if (OrICmp == LHS && AndICmp == RHS) {
+ // (LHS | RHS) & !(LHS & RHS) --> LHS & !RHS --> X & !Y
+ X = LHS;
+ Y = RHS;
}
- if (OrICmp == RHS && AndICmp == LHS && LHS->hasOneUse()) {
- // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS
- LHS->setPredicate(LHS->getInversePredicate());
+ if (OrICmp == RHS && AndICmp == LHS) {
+ // !(LHS & RHS) & (LHS | RHS) --> !LHS & RHS --> !Y & X
+ X = RHS;
+ Y = LHS;
+ }
+ if (X && Y && (Y->hasOneUse() || canFreelyInvertAllUsersOf(Y, &I))) {
+ // Invert the predicate of 'Y', thus inverting its output.
+ Y->setPredicate(Y->getInversePredicate());
+ // So, are there other uses of Y?
+ if (!Y->hasOneUse()) {
+ // We need to adapt other uses of Y though. Get a value that matches
+ // the original value of Y before inversion. While this increases
+ // immediate instruction count, we have just ensured that all the
+ // users are freely-invertible, so that 'not' *will* get folded away.
+ BuilderTy::InsertPointGuard Guard(Builder);
+ // Set insertion point to right after the Y.
+ Builder.SetInsertPoint(Y->getParent(), ++(Y->getIterator()));
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
+ // Replace all uses of Y (excluding the one in NotY!) with NotY.
+ Y->replaceUsesWithIf(NotY,
+ [NotY](Use &U) { return U.getUser() != NotY; });
+ }
+ // All done.
return Builder.CreateAnd(LHS, RHS);
}
}
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
- if (Value *V = foldXorOfICmps(LHS, RHS))
+ if (Value *V = foldXorOfICmps(LHS, RHS, I))
return replaceInstUsesWith(I, V);
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
/// This happens in cases where the ~ can be eliminated. If WillInvertAllUses
/// is true, work under the assumption that the caller intends to remove all
/// uses of V and only keep uses of ~V.
+///
+/// See also: canFreelyInvertAllUsersOf()
static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
// ~(~(X)) -> X.
if (match(V, m_Not(m_Value())))
return false;
}
+/// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
+///
+/// See also: IsFreeToInvert()
+static inline bool canFreelyInvertAllUsersOf(Value *V, Value *IgnoredUser) {
+ // Look at every user of V.
+ for (User *U : V->users()) {
+ if (U == IgnoredUser)
+ continue; // Don't consider this user.
+
+ auto *I = cast<Instruction>(U);
+ switch (I->getOpcode()) {
+ case Instruction::Select:
+ case Instruction::Br:
+ break; // Free to invert by swapping true/false values/destinations.
+ case Instruction::Xor: // Can invert 'xor' if it's a 'not', by ignoring it.
+ if (!match(I, m_Not(m_Value())))
+ return false; // Not a 'not'.
+ break;
+ default:
+ return false; // Don't know, likely not freely invertible.
+ }
+ // So far all users were free to invert...
+ }
+ return true; // Can freely invert all users!
+}
+
/// Some binary operators require special handling to avoid poison and undefined
/// behavior. If a constant vector has undef elements, replace those undefs with
/// identity constants if possible because those are always safe to execute.
Value *foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI);
Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Instruction &CxtI);
- Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+ Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &I);
/// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
/// NOTE: Unlike most of instcombine, this returns a Value which should
define i32 @t4_select_cond_xor_v0(i32 %X) {
; CHECK-LABEL: @t4_select_cond_xor_v0(
-; CHECK-NEXT: [[NEED_TO_CLAMP_POSITIVE:%.*]] = icmp sgt i32 [[X:%.*]], 32767
-; CHECK-NEXT: [[DONT_NEED_TO_CLAMP_NEGATIVE:%.*]] = icmp sgt i32 [[X]], -32768
-; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_POSITIVE]], i32 32767, i32 -32768
-; CHECK-NEXT: [[DONT_NEED_TO_CLAMP:%.*]] = xor i1 [[NEED_TO_CLAMP_POSITIVE]], [[DONT_NEED_TO_CLAMP_NEGATIVE]]
-; CHECK-NEXT: [[R:%.*]] = select i1 [[DONT_NEED_TO_CLAMP]], i32 [[X]], i32 [[CLAMP_LIMIT]]
+; CHECK-NEXT: [[NEED_TO_CLAMP_POSITIVE:%.*]] = icmp slt i32 [[X:%.*]], 32768
+; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_POSITIVE]], i32 -32768, i32 32767
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[CLAMP_LIMIT]]
; CHECK-NEXT: ret i32 [[R]]
;
%need_to_clamp_positive = icmp sgt i32 %X, 32767
define i32 @t5_select_cond_xor_v2(i32 %X) {
; CHECK-LABEL: @t5_select_cond_xor_v2(
-; CHECK-NEXT: [[DONT_NEED_TO_CLAMP_POSITIVE:%.*]] = icmp slt i32 [[X:%.*]], 32768
-; CHECK-NEXT: [[NEED_TO_CLAMP_NEGATIVE:%.*]] = icmp slt i32 [[X]], -32767
-; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_NEGATIVE]], i32 -32768, i32 32767
-; CHECK-NEXT: [[DONT_NEED_TO_CLAMP:%.*]] = xor i1 [[DONT_NEED_TO_CLAMP_POSITIVE]], [[NEED_TO_CLAMP_NEGATIVE]]
-; CHECK-NEXT: [[R:%.*]] = select i1 [[DONT_NEED_TO_CLAMP]], i32 [[X]], i32 [[CLAMP_LIMIT]]
+; CHECK-NEXT: [[NEED_TO_CLAMP_NEGATIVE:%.*]] = icmp sgt i32 [[X:%.*]], -32768
+; CHECK-NEXT: [[CLAMP_LIMIT:%.*]] = select i1 [[NEED_TO_CLAMP_NEGATIVE]], i32 32767, i32 -32768
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 [[CLAMP_LIMIT]]
; CHECK-NEXT: ret i32 [[R]]
;
%dont_need_to_clamp_positive = icmp sle i32 %X, 32767
; %cond0 is extra-used in select, which is freely invertible.
define i1 @v0_select_of_consts(i32 %X, i32* %selected) {
; CHECK-LABEL: @v0_select_of_consts(
-; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
-; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 32767, i32 -32768
+; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 -32768, i32 32767
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
-; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: ret i1 [[TMP1]]
;
%cond0 = icmp sgt i32 %X, 32767
%cond1 = icmp sgt i32 %X, -32768
}
define i1 @v1_select_of_var_and_const(i32 %X, i32 %Y, i32* %selected) {
; CHECK-LABEL: @v1_select_of_var_and_const(
-; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
-; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 [[Y:%.*]], i32 -32768
+; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 -32768, i32 [[Y:%.*]]
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
-; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: ret i1 [[TMP1]]
;
%cond0 = icmp sgt i32 %X, 32767
%cond1 = icmp sgt i32 %X, -32768
}
define i1 @v2_select_of_const_and_var(i32 %X, i32 %Y, i32* %selected) {
; CHECK-LABEL: @v2_select_of_const_and_var(
-; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
-; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 32767, i32 [[Y:%.*]]
+; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 [[Y:%.*]], i32 32767
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
-; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: ret i1 [[TMP1]]
;
%cond0 = icmp sgt i32 %X, 32767
%cond1 = icmp sgt i32 %X, -32768
define i1 @v3_branch(i32 %X, i32* %dst0, i32* %dst1) {
; CHECK-LABEL: @v3_branch(
; CHECK-NEXT: begin:
-; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
-; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
-; CHECK-NEXT: br i1 [[COND0]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
+; CHECK-NEXT: br i1 [[COND0]], label [[BB1:%.*]], label [[BB0:%.*]]
; CHECK: bb0:
; CHECK-NEXT: store i32 0, i32* [[DST0:%.*]], align 4
; CHECK-NEXT: br label [[END:%.*]]
; CHECK-NEXT: store i32 0, i32* [[DST1:%.*]], align 4
; CHECK-NEXT: br label [[END]]
; CHECK: end:
-; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: ret i1 [[TMP0]]
;
begin:
%cond0 = icmp sgt i32 %X, 32767
; Can invert 'not'.
define i1 @v4_not_store(i32 %X, i1* %not_cond) {
; CHECK-LABEL: @v4_not_store(
-; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
-; CHECK-NEXT: [[NOT_COND0:%.*]] = xor i1 [[COND0]], true
-; CHECK-NEXT: store i1 [[NOT_COND0]], i1* [[NOT_COND:%.*]], align 1
-; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
-; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
+; CHECK-NEXT: store i1 [[COND0]], i1* [[NOT_COND:%.*]], align 1
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: ret i1 [[TMP1]]
;
%cond0 = icmp sgt i32 %X, 32767
%not_cond0 = xor i1 %cond0, -1
; All extra uses are invertible.
define i1 @v5_select_and_not(i32 %X, i32 %Y, i32* %selected, i1* %not_cond) {
; CHECK-LABEL: @v5_select_and_not(
-; CHECK-NEXT: [[COND0:%.*]] = icmp sgt i32 [[X:%.*]], 32767
-; CHECK-NEXT: [[COND1:%.*]] = icmp sgt i32 [[X]], -32768
-; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 32767, i32 [[Y:%.*]]
-; CHECK-NEXT: [[NOT_COND0:%.*]] = xor i1 [[COND0]], true
-; CHECK-NEXT: store i1 [[NOT_COND0]], i1* [[NOT_COND:%.*]], align 1
+; CHECK-NEXT: [[COND0:%.*]] = icmp slt i32 [[X:%.*]], 32768
+; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND0]], i32 [[Y:%.*]], i32 32767
+; CHECK-NEXT: store i1 [[COND0]], i1* [[NOT_COND:%.*]], align 1
; CHECK-NEXT: store i32 [[SELECT]], i32* [[SELECTED:%.*]], align 4
-; CHECK-NEXT: [[RES:%.*]] = xor i1 [[COND0]], [[COND1]]
-; CHECK-NEXT: ret i1 [[RES]]
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 [[X]], 32767
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 65535
+; CHECK-NEXT: ret i1 [[TMP1]]
;
%cond0 = icmp sgt i32 %X, 32767
%cond1 = icmp sgt i32 %X, -32768