[PowerPC] Implement combineRepeatedFPDivisors

author Hal Finkel <hfinkel@anl.gov>

Mon, 24 Nov 2014 23:45:21 +0000 (23:45 +0000)

committer Hal Finkel <hfinkel@anl.gov>

Mon, 24 Nov 2014 23:45:21 +0000 (23:45 +0000)
author Hal Finkel <hfinkel@anl.gov>
Mon, 24 Nov 2014 23:45:21 +0000 (23:45 +0000)
committer Hal Finkel <hfinkel@anl.gov>
Mon, 24 Nov 2014 23:45:21 +0000 (23:45 +0000)
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp

index 351356028d63e5f456a63295382e0c817d8b98c7..7351d19120a1dd07f5f51356445220657fdea049 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7526,6 +7526,28 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
    return SDValue();
  }
  
+bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+  // Note: This functionality is used only when unsafe-fp-math is enabled, and
+  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
+  // enabled for division), this functionality is redundant with the default
+  // combiner logic (once the division -> reciprocal/multiply transformation
+  // has taken place). As a result, this matters more for older cores than for
+  // newer ones.
+
+  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+  // reciprocal if there are two or more FDIVs (for embedded cores with only
+  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
+  switch (Subtarget.getDarwinDirective()) {
+  default:
+    return NumUsers > 2;
+  case PPC::DIR_440:
+  case PPC::DIR_A2:
+  case PPC::DIR_E500mc:
+  case PPC::DIR_E5500:
+    return NumUsers > 1;
+  }
+}
+
  static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
                              unsigned Bytes, int Dist,
                              SelectionDAG &DAG) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h

index bb4d1f1f0197536323fb521f9bd460a93da73e27..4b4d25e3a3ccaffe35ee18c0fd84dfdb0ce41e33 100644 (file)
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -704,6 +704,7 @@ namespace llvm {
                               bool &UseOneConstNR) const override;
      SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                               unsigned &RefinementSteps) const override;
+    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
  
      CCAssignFn *useFastISelCCs(unsigned Flag) const;
    };
diff --git a/test/CodeGen/PowerPC/fdiv-combine.ll b/test/CodeGen/PowerPC/fdiv-combine.ll

new file mode 100644 (file)

index 0000000..d3dc3fe
--- /dev/null
+++ b/test/CodeGen/PowerPC/fdiv-combine.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Following test case checks:
+;   a / D; b / D; c / D;
+;                =>
+;   recip = 1.0 / D; a * recip; b * recip; c * recip;
+
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  %div2 = fdiv double %c, %D
+  tail call void @foo_3d(double %div, double %div1, double %div2)
+  ret void
+}
+
+define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+; CHECK-LABEL: two_fdiv_double:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  tail call void @foo_2d(double %div, double %div1)
+  ret void
+}
+
+declare void @foo_3d(double, double, double)
+declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
+declare void @foo_2d(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
author	Hal Finkel <hfinkel@anl.gov>
	Mon, 24 Nov 2014 23:45:21 +0000 (23:45 +0000)
committer	Hal Finkel <hfinkel@anl.gov>
	Mon, 24 Nov 2014 23:45:21 +0000 (23:45 +0000)
lib/Target/PowerPC/PPCISelLowering.cpp		patch \| blob \| history
lib/Target/PowerPC/PPCISelLowering.h		patch \| blob \| history
test/CodeGen/PowerPC/fdiv-combine.ll	[new file with mode: 0644]	patch \| blob