From 47941aa098fe064276632fef1867581effe0dd26 Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 6 Mar 2015 19:49:10 +0000 Subject: [PATCH] DAGCombiner: Canonicalize select(and/or,x,y) depending on target. This is based on the following equivalences: select(C0 & C1, X, Y) <=> select(C0, select(C1, X, Y), Y) select(C0 | C1, X, Y) <=> select(C0, X, select(C1, X, Y)) Many target cannot perform and/or on the CPU flags and therefore the right side should be choosen to avoid materializign the i1 flags in an integer register. If the target can perform this operation efficiently we normalize to the left form. Differential Revision: http://reviews.llvm.org/D7622 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231507 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 19 +++++++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 63 ++++++++++++++++++++++++ test/CodeGen/ARM/movcc-double.ll | 50 +++++++++++++++++++ test/CodeGen/R600/or.ll | 6 +-- test/CodeGen/X86/cmov-double.ll | 52 +++++++++++++++++++ test/CodeGen/X86/jump_sign.ll | 14 +++--- test/CodeGen/X86/zext-sext.ll | 7 +-- 7 files changed, 197 insertions(+), 14 deletions(-) create mode 100644 test/CodeGen/ARM/movcc-double.ll create mode 100644 test/CodeGen/X86/cmov-double.ll diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index a56fdf9f222..85b6e954dcd 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1097,6 +1097,25 @@ public: virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { return nullptr; } + + /// Returns true if we should normalize + /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and + /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely + /// that it saves us from materializing N0 and N1 in an integer register. + /// Targets that are able to perform and/or on flags should return false here. + virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, + EVT VT) const { + // If a target has multiple condition registers, then it likely has logical + // operations on those registers. + if (hasMultipleConditionRegisters()) + return false; + // Only do the transform if the value won't be split into multiple + // registers. + LegalizeTypeAction Action = getTypeAction(Context, VT); + return Action != TypeExpandInteger && Action != TypeExpandFloat && + Action != TypeSplitVector; + } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e247961a7ba..64228a1aa9b 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4819,6 +4819,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SimplifySelect(SDLoc(N), N0, N1, N2); } + if (VT0 == MVT::i1) { + if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2) { + // Create the actual and node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1) { + // Create the actual or node if we can generate good code for it. + if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + return SDValue(); } diff --git a/test/CodeGen/ARM/movcc-double.ll b/test/CodeGen/ARM/movcc-double.ll new file mode 100644 index 00000000000..9ce708d9bd3 --- /dev/null +++ b/test/CodeGen/ARM/movcc-double.ll @@ -0,0 +1,50 @@ +; RUN: llc -o - %s | FileCheck %s +target triple = "arm-unknown-unknown" + +; select with and i1/or i1 condition should be implemented as a series of 2 +; cmovs, not by producing two conditions and using and on them. + +define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { +; CHECK-LABEL: select_and +; CHECK-NOT: tst +; CHECK-NOT: movne +; CHECK: mov{{lo|hs}} +; CHECK: mov{{lo|hs}} + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a2, %a3 + %and = and i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { +; select with and i1 condition should be implemented as a series of 2 cmovs, not +; by producing two conditions and using and on them. +; CHECK-LABEL: select_or +; CHECK-NOT: orss +; CHECK-NOT: tst +; CHECK: mov{{lo|hs}} +; CHECK: mov{{lo|hs}} + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a2, %a3 + %and = or i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +; If one of the conditions is materialized as a 0/1 value anyway, then the +; sequence of 2 cmovs should not be used. + +@var32 = global i32 0 +define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +; CHECK-LABEL: select_noopt +; CHECK: orrs +; CHECK: movne + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a1, %a2 + %or = or i1 %cmp0, %cmp1 + %zero_one = zext i1 %or to i32 + store volatile i32 %zero_one, i32* @var32 + %res = select i1 %or, i32 %a3, i32 %a4 + ret i32 %res +} diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll index 1b1cb9a83cb..1337adb7b45 100644 --- a/test/CodeGen/R600/or.ll +++ b/test/CodeGen/R600/or.ll @@ -156,14 +156,14 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { ; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] -define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { +define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { %a = load float, float addrspace(1)* %in0 %b = load float, float addrspace(1)* %in1 %acmp = fcmp oge float %a, 0.000000e+00 %bcmp = fcmp oge float %b, 0.000000e+00 %or = or i1 %acmp, %bcmp - %result = select i1 %or, float %a, float %b - store float %result, float addrspace(1)* %out + %result = zext i1 %or to i32 + store i32 %result, i32 addrspace(1)* %out ret void } diff --git a/test/CodeGen/X86/cmov-double.ll b/test/CodeGen/X86/cmov-double.ll new file mode 100644 index 00000000000..994a027596c --- /dev/null +++ b/test/CodeGen/X86/cmov-double.ll @@ -0,0 +1,52 @@ +; RUN: llc -o - %s | FileCheck %s +target triple = "x86_64-unknown-unknown" + +; select with and i1/or i1 condition should be implemented as a series of 2 +; cmovs, not by producing two conditions and using and on them. + +define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) { +; CHECK-LABEL: select_and +; CHECK-NOT: set +; CHECK-NOT: and[lb] +; CHECK-NOT: test +; CHECK: cmov +; CHECK: cmov + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = fcmp olt float %a2, %a3 + %and = and i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) { +; select with and i1 condition should be implemented as a series of 2 cmovs, not +; by producing two conditions and using and on them. +; CHECK-LABEL: select_or +; CHECK-NOT: set +; CHECK-NOT: or[lb] +; CHECK-NOT: test +; CHECK: cmov +; CHECK: cmov + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = fcmp olt float %a2, %a3 + %and = or i1 %cmp0, %cmp1 + %res = select i1 %and, i32 %a4, i32 %a5 + ret i32 %res +} + +; If one of the conditions is materialized as a 0/1 value anyway, then the +; sequence of 2 cmovs should not be used. + +@var32 = global i32 0 +define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) { +; CHECK-LABEL: select_noopt +; CHECK: cmov +; CHECK-NOT: cmov + %cmp0 = icmp ult i32 %a0, %a1 + %cmp1 = icmp ult i32 %a1, %a2 + %or = or i1 %cmp0, %cmp1 + %zero_one = zext i1 %or to i32 + store volatile i32 %zero_one, i32* @var32 + %res = select i1 %or, i32 %a3, i32 %a4 + ret i32 %res +} diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll index 440f1cc9b49..31a7af31790 100644 --- a/test/CodeGen/X86/jump_sign.ll +++ b/test/CodeGen/X86/jump_sign.ll @@ -217,17 +217,15 @@ entry: ; PR13475 ; If we have sub a, b and cmp b, a and the result of cmp is used ; by sbb, we should not optimize cmp away. -define i32 @func_q(i32 %j.4, i32 %w, i32 %el) { +define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) { ; CHECK-LABEL: func_q: ; CHECK: cmp ; CHECK-NEXT: sbb - %tmp532 = add i32 %j.4, %w - %tmp533 = icmp ugt i32 %tmp532, %el - %tmp534 = icmp ult i32 %w, %el - %or.cond = and i1 %tmp533, %tmp534 - %tmp535 = sub i32 %el, %w - %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4 - ret i32 %j.5 + %1 = icmp ult i32 %a0, %a1 + %2 = sub i32 %a1, %a0 + %3 = select i1 %1, i32 -1, i32 0 + %4 = xor i32 %2, %3 + ret i32 %4 } ; rdar://11873276 define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind { diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll index 2758bff8024..01f871159d3 100644 --- a/test/CodeGen/X86/zext-sext.ll +++ b/test/CodeGen/X86/zext-sext.ll @@ -34,11 +34,12 @@ entry: %tmp12 = add i64 %tmp11, 5089792279245435153 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]] -; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] ; CHECK: cmpl $-8608074, %e[[REGISTER_zext]] +; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] ; CHECK-NOT: [[REGISTER_zext]] -; CHECK-DAG: testl %e[[REGISTER_zext]] -; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]] +; CHECK-DAG: cmpl $2138875573, %e[[REGISTER_zext]] +; CHECK: movq [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]] +; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext2]] %tmp13 = sub i64 %tmp12, 2138875574 %tmp14 = zext i32 %tmp4 to i64 -- 2.40.0