virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
return nullptr;
}
+
+ /// Returns true if we should normalize
+ /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+ /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
+ /// that it saves us from materializing N0 and N1 in an integer register.
+ /// Targets that are able to perform and/or on flags should return false here.
+ virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
+ EVT VT) const {
+ // If a target has multiple condition registers, then it likely has logical
+ // operations on those registers.
+ if (hasMultipleConditionRegisters())
+ return false;
+ // Only do the transform if the value won't be split into multiple
+ // registers.
+ LegalizeTypeAction Action = getTypeAction(Context, VT);
+ return Action != TypeExpandInteger && Action != TypeExpandFloat &&
+ Action != TypeSplitVector;
+ }
+
//===--------------------------------------------------------------------===//
// TargetLowering Configuration Methods - These methods should be invoked by
// the derived class constructor to configure this object for the target.
return SimplifySelect(SDLoc(N), N0, N1, N2);
}
+ if (VT0 == MVT::i1) {
+ if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ // select (and Cond0, Cond1), X, Y
+ // -> select Cond0, (select Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+ InnerSelect, N2);
+ }
+ // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+ N1.getValueType(), Cond1, N1, N2);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+ InnerSelect);
+ }
+ }
+
+ // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+ if (N1->getOpcode() == ISD::SELECT) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2) {
+ // Create the actual and node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+ N0, N1_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+ N1_1, N2);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1_1, N2);
+ }
+ }
+ // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+ if (N2->getOpcode() == ISD::SELECT) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1) {
+ // Create the actual or node if we can generate good code for it.
+ if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+ SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+ N0, N2_0);
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+ N1, N2_2);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, N))
+ return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+ N1, N2_2);
+ }
+ }
+ }
+
return SDValue();
}
--- /dev/null
+; RUN: llc -o - %s | FileCheck %s
+target triple = "arm-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: tst
+; CHECK-NOT: movne
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a2, %a3
+ %and = and i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: orss
+; CHECK-NOT: tst
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a2, %a3
+ %and = or i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: orrs
+; CHECK: movne
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a1, %a2
+ %or = or i1 %cmp0, %cmp1
+ %zero_one = zext i1 %or to i32
+ store volatile i32 %zero_one, i32* @var32
+ %res = select i1 %or, i32 %a3, i32 %a4
+ ret i32 %res
+}
; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
%a = load float, float addrspace(1)* %in0
%b = load float, float addrspace(1)* %in1
%acmp = fcmp oge float %a, 0.000000e+00
%bcmp = fcmp oge float %b, 0.000000e+00
%or = or i1 %acmp, %bcmp
- %result = select i1 %or, float %a, float %b
- store float %result, float addrspace(1)* %out
+ %result = zext i1 %or to i32
+ store i32 %result, i32 addrspace(1)* %out
ret void
}
--- /dev/null
+; RUN: llc -o - %s | FileCheck %s
+target triple = "x86_64-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: set
+; CHECK-NOT: and[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = fcmp olt float %a2, %a3
+ %and = and i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: set
+; CHECK-NOT: or[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = fcmp olt float %a2, %a3
+ %and = or i1 %cmp0, %cmp1
+ %res = select i1 %and, i32 %a4, i32 %a5
+ ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: cmov
+; CHECK-NOT: cmov
+ %cmp0 = icmp ult i32 %a0, %a1
+ %cmp1 = icmp ult i32 %a1, %a2
+ %or = or i1 %cmp0, %cmp1
+ %zero_one = zext i1 %or to i32
+ store volatile i32 %zero_one, i32* @var32
+ %res = select i1 %or, i32 %a3, i32 %a4
+ ret i32 %res
+}
; PR13475
; If we have sub a, b and cmp b, a and the result of cmp is used
; by sbb, we should not optimize cmp away.
-define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
+define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
; CHECK-LABEL: func_q:
; CHECK: cmp
; CHECK-NEXT: sbb
- %tmp532 = add i32 %j.4, %w
- %tmp533 = icmp ugt i32 %tmp532, %el
- %tmp534 = icmp ult i32 %w, %el
- %or.cond = and i1 %tmp533, %tmp534
- %tmp535 = sub i32 %el, %w
- %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
- ret i32 %j.5
+ %1 = icmp ult i32 %a0, %a1
+ %2 = sub i32 %a1, %a0
+ %3 = select i1 %1, i32 -1, i32 0
+ %4 = xor i32 %2, %3
+ ret i32 %4
}
; rdar://11873276
define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
%tmp12 = add i64 %tmp11, 5089792279245435153
; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK: cmpl $-8608074, %e[[REGISTER_zext]]
+; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK-NOT: [[REGISTER_zext]]
-; CHECK-DAG: testl %e[[REGISTER_zext]]
-; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]]
+; CHECK-DAG: cmpl $2138875573, %e[[REGISTER_zext]]
+; CHECK: movq [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]]
+; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext2]]
%tmp13 = sub i64 %tmp12, 2138875574
%tmp14 = zext i32 %tmp4 to i64