[DAGCombine] Transform (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)).

author Chad Rosier <mcrosier@codeaurora.org>

Thu, 4 May 2017 14:14:44 +0000 (14:14 +0000)

committer Chad Rosier <mcrosier@codeaurora.org>

Thu, 4 May 2017 14:14:44 +0000 (14:14 +0000)
author Chad Rosier <mcrosier@codeaurora.org>
Thu, 4 May 2017 14:14:44 +0000 (14:14 +0000)
committer Chad Rosier <mcrosier@codeaurora.org>
Thu, 4 May 2017 14:14:44 +0000 (14:14 +0000)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 03698ac862afc50a4cab58a9171e659e3896074e..c2be9f3f058bb4005dcfcf73d4343cb395d6eeaa 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9469,6 +9469,14 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
    return SDValue();
  }
  
+static bool isFMulNegTwo(SDValue &N) {
+  if (N.getOpcode() != ISD::FMUL)
+    return false;
+  if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
+    return CFP->isExactlyValue(-2.0);
+  return false;
+}
+
  SDValue DAGCombiner::visitFADD(SDNode *N) {
    SDValue N0 = N->getOperand(0);
    SDValue N1 = N->getOperand(1);
@@ -9507,6 +9515,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
      return DAG.getNode(ISD::FSUB, DL, VT, N1,
                         GetNegatedExpression(N0, DAG, LegalOperations), Flags);
  
+  // fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
+  // fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
+  if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
+      (isFMulNegTwo(N1) && N1.hasOneUse())) {
+    bool N1IsFMul = isFMulNegTwo(N1);
+    SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
+    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
+    return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
+  }
+
    // FIXME: Auto-upgrade the target/function-level option.
    if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
      // fold (fadd A, 0) -> A
diff --git a/test/CodeGen/AArch64/fadd-combines.ll b/test/CodeGen/AArch64/fadd-combines.ll

new file mode 100644 (file)

index 0000000..c106f29
--- /dev/null
+++ b/test/CodeGen/AArch64/fadd-combines.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: test1:
+; CHECK: fadd d1, d1, d1
+; CHECK: fsub d0, d0, d1
+define double @test1(double %a, double %b) local_unnamed_addr #0 {
+entry:
+  %mul = fmul double %b, -2.000000e+00
+  %add1 = fadd double %a, %mul
+  ret double %add1
+}
+
+; DAGCombine will canonicalize 'a - 2.0*b' to 'a + -2.0*b'
+; CHECK-LABEL: test2:
+; CHECK: fadd d1, d1, d1
+; CHECK: fsub d0, d0, d1
+define double @test2(double %a, double %b) local_unnamed_addr #0 {
+entry:
+  %mul = fmul double %b, 2.000000e+00
+  %add1 = fsub double %a, %mul
+  ret double %add1
+}
+
+; CHECK-LABEL: test3:
+; CHECK: fmul d0, d0, d1
+; CHECK: fadd d1, d2, d2
+; CHECK: fsub d0, d0, d1
+define double @test3(double %a, double %b, double %c) local_unnamed_addr #0 {
+entry:
+  %mul = fmul double %a, %b
+  %mul1 = fmul double %c, 2.000000e+00
+  %sub = fsub double %mul, %mul1
+  ret double %sub
+}
+
+; CHECK-LABEL: test4:
+; CHECK: fmul d0, d0, d1
+; CHECK: fadd d1, d2, d2
+; CHECK: fsub d0, d0, d1
+define double @test4(double %a, double %b, double %c) local_unnamed_addr #0 {
+entry:
+  %mul = fmul double %a, %b
+  %mul1 = fmul double %c, -2.000000e+00
+  %add2 = fadd double %mul, %mul1
+  ret double %add2
+}
+
+; CHECK-LABEL: test5:
+; CHECK: fadd v1.4s, v1.4s, v1.4s
+; CHECK: fsub v0.4s, v0.4s, v1.4s
+define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
+  %mul = fmul <4 x float> %b, <float -2.0, float -2.0, float -2.0, float -2.0>
+  %add = fadd <4 x float> %a, %mul
+  ret <4 x float> %add
+}
+
+; CHECK-LABEL: test6:
+; CHECK: fadd v1.4s, v1.4s, v1.4s
+; CHECK: fsub v0.4s, v0.4s, v1.4s
+define <4 x float> @test6(<4 x float> %a, <4 x float> %b) {
+  %mul = fmul <4 x float> %b, <float 2.0, float 2.0, float 2.0, float 2.0>
+  %add = fsub <4 x float> %a, %mul
+  ret <4 x float> %add
+}
+
+; Don't fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) if the fmul has
+; multiple uses.
+; CHECK-LABEL: test7:
+; CHECK: fmul
+define double @test7(double %a, double %b) local_unnamed_addr #0 {
+entry:
+  %mul = fmul double %b, -2.000000e+00
+  %add1 = fadd double %a, %mul
+  call void @use(double %mul)
+  ret double %add1
+}
+
+declare void @use(double)
diff --git a/test/CodeGen/AMDGPU/fmuladd.f32.ll b/test/CodeGen/AMDGPU/fmuladd.f32.ll

index fb605dd2e4bd480a40c59b6229f33ae42d687374..e422550266924e542de644927d256573f0f9c416 100644 (file)
--- a/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -191,8 +191,8 @@ define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
  
  ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
  
-; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
+; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
+; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
  
  ; SI-DENORM: buffer_store_dword [[RESULT]]
  ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
@@ -251,8 +251,8 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out,
  
  ; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]]
  
-; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]]
-; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
+; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
+; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
  
  ; SI-DENORM: buffer_store_dword [[RESULT]]
  ; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
author	Chad Rosier <mcrosier@codeaurora.org>
	Thu, 4 May 2017 14:14:44 +0000 (14:14 +0000)
committer	Chad Rosier <mcrosier@codeaurora.org>
	Thu, 4 May 2017 14:14:44 +0000 (14:14 +0000)
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
test/CodeGen/AArch64/fadd-combines.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/AMDGPU/fmuladd.f32.ll		patch \| blob \| history