DAGCombiner: Canonicalize select(and/or,x,y) depending on target.

author Matthias Braun <matze@braunis.de>

Fri, 6 Mar 2015 19:49:10 +0000 (19:49 +0000)

committer Matthias Braun <matze@braunis.de>

Fri, 6 Mar 2015 19:49:10 +0000 (19:49 +0000)
author Matthias Braun <matze@braunis.de>
Fri, 6 Mar 2015 19:49:10 +0000 (19:49 +0000)
committer Matthias Braun <matze@braunis.de>
Fri, 6 Mar 2015 19:49:10 +0000 (19:49 +0000)
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h

index a56fdf9f2223aecf8757103897a2435489fd208b..85b6e954dcd42dfdc9881d3c9dbb54d2538d2355 100644 (file)
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1097,6 +1097,25 @@ public:
    virtual LoadInst *lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
      return nullptr;
    }
+
+  /// Returns true if we should normalize
+  /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+  /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
+  /// that it saves us from materializing N0 and N1 in an integer register.
+  /// Targets that are able to perform and/or on flags should return false here.
+  virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
+                                               EVT VT) const {
+    // If a target has multiple condition registers, then it likely has logical
+    // operations on those registers.
+    if (hasMultipleConditionRegisters())
+      return false;
+    // Only do the transform if the value won't be split into multiple
+    // registers.
+    LegalizeTypeAction Action = getTypeAction(Context, VT);
+    return Action != TypeExpandInteger && Action != TypeExpandFloat &&
+      Action != TypeSplitVector;
+  }
+
    //===--------------------------------------------------------------------===//
    // TargetLowering Configuration Methods - These methods should be invoked by
    // the derived class constructor to configure this object for the target.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index e247961a7baa1999e58faebeb2702c10d1cbcd4b..64228a1aa9b9ec1b7e87496fba69694d26580278 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4819,6 +4819,69 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
      return SimplifySelect(SDLoc(N), N0, N1, N2);
    }
  
+  if (VT0 == MVT::i1) {
+    if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+      // select (and Cond0, Cond1), X, Y
+      //   -> select Cond0, (select Cond1, X, Y), Y
+      if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+                           InnerSelect, N2);
+      }
+      // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+      if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+        SDValue Cond0 = N0->getOperand(0);
+        SDValue Cond1 = N0->getOperand(1);
+        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                          N1.getValueType(), Cond1, N1, N2);
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+                           InnerSelect);
+      }
+    }
+
+    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+    if (N1->getOpcode() == ISD::SELECT) {
+      SDValue N1_0 = N1->getOperand(0);
+      SDValue N1_1 = N1->getOperand(1);
+      SDValue N1_2 = N1->getOperand(2);
+      if (N1_2 == N2) {
+        // Create the actual and node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+                                    N0, N1_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+                             N1_1, N2);
+        }
+        // Otherwise see if we can optimize the "and" to a better pattern.
+        if (SDValue Combined = visitANDLike(N0, N1_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1_1, N2);
+      }
+    }
+    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+    if (N2->getOpcode() == ISD::SELECT) {
+      SDValue N2_0 = N2->getOperand(0);
+      SDValue N2_1 = N2->getOperand(1);
+      SDValue N2_2 = N2->getOperand(2);
+      if (N2_1 == N1) {
+        // Create the actual or node if we can generate good code for it.
+        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
+          SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+                                   N0, N2_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+                             N1, N2_2);
+        }
+        // Otherwise see if we can optimize to a better pattern.
+        if (SDValue Combined = visitORLike(N0, N2_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1, N2_2);
+      }
+    }
+  }
+
    return SDValue();
  }
  
diff --git a/test/CodeGen/ARM/movcc-double.ll b/test/CodeGen/ARM/movcc-double.ll

new file mode 100644 (file)

index 0000000..9ce708d
--- /dev/null
+++ b/test/CodeGen/ARM/movcc-double.ll
@@ -0,0 +1,50 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "arm-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: tst
+; CHECK-NOT: movne
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a2, %a3
+  %and = and i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: orss
+; CHECK-NOT: tst
+; CHECK: mov{{lo|hs}}
+; CHECK: mov{{lo|hs}}
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a2, %a3
+  %and = or i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: orrs
+; CHECK: movne
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a1, %a2
+  %or = or i1 %cmp0, %cmp1
+  %zero_one = zext i1 %or to i32
+  store volatile i32 %zero_one, i32* @var32
+  %res = select i1 %or, i32 %a3, i32 %a4
+  ret i32 %res
+}
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll

index 1b1cb9a83cb44ca546e63347c03583dd95214214..1337adb7b453a1e951a48bfa68087ca1b9820a8d 100644 (file)
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -156,14 +156,14 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
  ; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
  
  ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
-define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
+define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
    %a = load float, float addrspace(1)* %in0
    %b = load float, float addrspace(1)* %in1
    %acmp = fcmp oge float %a, 0.000000e+00
    %bcmp = fcmp oge float %b, 0.000000e+00
    %or = or i1 %acmp, %bcmp
-  %result = select i1 %or, float %a, float %b
-  store float %result, float addrspace(1)* %out
+  %result = zext i1 %or to i32
+  store i32 %result, i32 addrspace(1)* %out
    ret void
  }
  
diff --git a/test/CodeGen/X86/cmov-double.ll b/test/CodeGen/X86/cmov-double.ll

new file mode 100644 (file)

index 0000000..994a027
--- /dev/null
+++ b/test/CodeGen/X86/cmov-double.ll
@@ -0,0 +1,52 @@
+; RUN: llc -o - %s | FileCheck %s
+target triple = "x86_64-unknown-unknown"
+
+; select with and i1/or i1 condition should be implemented as a series of 2
+; cmovs, not by producing two conditions and using and on them.
+
+define i32 @select_and(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; CHECK-LABEL: select_and
+; CHECK-NOT: set
+; CHECK-NOT: and[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = fcmp olt float %a2, %a3
+  %and = and i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+define i32 @select_or(i32 %a0, i32 %a1, float %a2, float %a3, i32 %a4, i32 %a5) {
+; select with and i1 condition should be implemented as a series of 2 cmovs, not
+; by producing two conditions and using and on them.
+; CHECK-LABEL: select_or
+; CHECK-NOT: set
+; CHECK-NOT: or[lb]
+; CHECK-NOT: test
+; CHECK: cmov
+; CHECK: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = fcmp olt float %a2, %a3
+  %and = or i1 %cmp0, %cmp1
+  %res = select i1 %and, i32 %a4, i32 %a5
+  ret i32 %res
+}
+
+; If one of the conditions is materialized as a 0/1 value anyway, then the
+; sequence of 2 cmovs should not be used.
+
+@var32 = global i32 0
+define i32 @select_noopt(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+; CHECK-LABEL: select_noopt
+; CHECK: cmov
+; CHECK-NOT: cmov
+  %cmp0 = icmp ult i32 %a0, %a1
+  %cmp1 = icmp ult i32 %a1, %a2
+  %or = or i1 %cmp0, %cmp1
+  %zero_one = zext i1 %or to i32
+  store volatile i32 %zero_one, i32* @var32
+  %res = select i1 %or, i32 %a3, i32 %a4
+  ret i32 %res
+}
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll

index 440f1cc9b49fc8d380989623d04dd79de0d24bbf..31a7af31790bb488bbc9dcda19e9b4379273398f 100644 (file)
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -217,17 +217,15 @@ entry:
  ; PR13475
  ; If we have sub a, b and cmp b, a and the result of cmp is used
  ; by sbb, we should not optimize cmp away.
-define i32 @func_q(i32 %j.4, i32 %w, i32 %el) {
+define i32 @func_q(i32 %a0, i32 %a1, i32 %a2) {
  ; CHECK-LABEL: func_q:
  ; CHECK: cmp
  ; CHECK-NEXT: sbb
-  %tmp532 = add i32 %j.4, %w
-  %tmp533 = icmp ugt i32 %tmp532, %el
-  %tmp534 = icmp ult i32 %w, %el
-  %or.cond = and i1 %tmp533, %tmp534
-  %tmp535 = sub i32 %el, %w
-  %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
-  ret i32 %j.5
+  %1 = icmp ult i32 %a0, %a1
+  %2 = sub i32 %a1, %a0
+  %3 = select i1 %1, i32 -1, i32 0
+  %4 = xor i32 %2, %3
+  ret i32 %4
  }
  ; rdar://11873276
  define i8* @func_r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll

index 2758bff8024254f73fb09b62e0f8c6d061d05693..01f871159d3b3b92c3505817420ca8e13c8c0322 100644 (file)
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -34,11 +34,12 @@ entry:
    %tmp12 = add i64 %tmp11, 5089792279245435153
  
  ; CHECK:      addl     $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
-; CHECK:      movslq   %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
  ; CHECK:      cmpl     $-8608074, %e[[REGISTER_zext]]
+; CHECK:      movslq   %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
  ; CHECK-NOT:  [[REGISTER_zext]]
-; CHECK-DAG:  testl     %e[[REGISTER_zext]]
-; CHECK:      subq     %r[[REGISTER_zext]], [[REGISTER_sext]]
+; CHECK-DAG:  cmpl     $2138875573, %e[[REGISTER_zext]]
+; CHECK:      movq  [[REGISTER_sext]], [[REGISTER_sext2:%[a-z0-9]+]]
+; CHECK:      subq     %r[[REGISTER_zext]], [[REGISTER_sext2]]
  
    %tmp13 = sub i64 %tmp12, 2138875574
    %tmp14 = zext i32 %tmp4 to i64
author	Matthias Braun <matze@braunis.de>
	Fri, 6 Mar 2015 19:49:10 +0000 (19:49 +0000)
committer	Matthias Braun <matze@braunis.de>
	Fri, 6 Mar 2015 19:49:10 +0000 (19:49 +0000)
include/llvm/Target/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/ARM/movcc-double.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/R600/or.ll		patch \| blob \| history
test/CodeGen/X86/cmov-double.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/X86/jump_sign.ll		patch \| blob \| history
test/CodeGen/X86/zext-sext.ll		patch \| blob \| history