[FPEnv] Lower STRICT_FP_EXTEND and STRICT_FP_ROUND nodes in preprocess phase of ISelL...

author Kevin P. Neal <kevin.neal@sas.com>

Fri, 14 Jun 2019 16:28:55 +0000 (16:28 +0000)

committer Kevin P. Neal <kevin.neal@sas.com>

Fri, 14 Jun 2019 16:28:55 +0000 (16:28 +0000)
author Kevin P. Neal <kevin.neal@sas.com>
Fri, 14 Jun 2019 16:28:55 +0000 (16:28 +0000)
committer Kevin P. Neal <kevin.neal@sas.com>
Fri, 14 Jun 2019 16:28:55 +0000 (16:28 +0000)
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp

index 107af0ca3c991eeef0acac6a0ba1210c9128a0db..3e3d6beee3e330ac0389c1ca8205955c7cc61dba 100644 (file)
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -931,59 +931,124 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
      // and the node legalization.  As such this pass basically does "really
      // late" legalization of these inline with the X86 isel pass.
      // FIXME: This should only happen when not compiled with -O0.
-    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
-      continue;
+    switch (N->getOpcode()) {
+    default: continue;
+    case ISD::FP_ROUND:
+    case ISD::FP_EXTEND:
+    {
+      MVT SrcVT = N->getOperand(0).getSimpleValueType();
+      MVT DstVT = N->getSimpleValueType(0);
+
+      // If any of the sources are vectors, no fp stack involved.
+      if (SrcVT.isVector() || DstVT.isVector())
+        continue;
  
-    MVT SrcVT = N->getOperand(0).getSimpleValueType();
-    MVT DstVT = N->getSimpleValueType(0);
+      // If the source and destination are SSE registers, then this is a legal
+      // conversion that should not be lowered.
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+      if (SrcIsSSE && DstIsSSE)
+        continue;
  
-    // If any of the sources are vectors, no fp stack involved.
-    if (SrcVT.isVector() || DstVT.isVector())
-      continue;
+      if (!SrcIsSSE && !DstIsSSE) {
+        // If this is an FPStack extension, it is a noop.
+        if (N->getOpcode() == ISD::FP_EXTEND)
+          continue;
+        // If this is a value-preserving FPStack truncation, it is a noop.
+        if (N->getConstantOperandVal(1))
+          continue;
+      }
  
-    // If the source and destination are SSE registers, then this is a legal
-    // conversion that should not be lowered.
-    const X86TargetLowering *X86Lowering =
-        static_cast<const X86TargetLowering *>(TLI);
-    bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
-    bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
-    if (SrcIsSSE && DstIsSSE)
-      continue;
+      // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+      // FPStack has extload and truncstore.  SSE can fold direct loads into other
+      // operations.  Based on this, decide what we want to do.
+      MVT MemVT;
+      if (N->getOpcode() == ISD::FP_ROUND)
+        MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+      else
+        MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+      SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+      SDLoc dl(N);
+
+      // FIXME: optimize the case where the src/dest is a load or store?
+
+      SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
+                                          MemTmp, MachinePointerInfo(), MemVT);
+      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                          MachinePointerInfo(), MemVT);
+
+      // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+      // extload we created.  This will cause general havok on the dag because
+      // anything below the conversion could be folded into other existing nodes.
+      // To avoid invalidating 'I', back it up to the convert node.
+      --I;
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+      break;
+    }
  
-    if (!SrcIsSSE && !DstIsSSE) {
-      // If this is an FPStack extension, it is a noop.
-      if (N->getOpcode() == ISD::FP_EXTEND)
+    //The sequence of events for lowering STRICT_FP versions of these nodes requires
+    //dealing with the chain differently, as there is already a preexisting chain.
+    case ISD::STRICT_FP_ROUND:
+    case ISD::STRICT_FP_EXTEND:
+    {
+      MVT SrcVT = N->getOperand(1).getSimpleValueType();
+      MVT DstVT = N->getSimpleValueType(0);
+
+      // If any of the sources are vectors, no fp stack involved.
+      if (SrcVT.isVector() || DstVT.isVector())
          continue;
-      // If this is a value-preserving FPStack truncation, it is a noop.
-      if (N->getConstantOperandVal(1))
+
+      // If the source and destination are SSE registers, then this is a legal
+      // conversion that should not be lowered.
+      const X86TargetLowering *X86Lowering =
+          static_cast<const X86TargetLowering *>(TLI);
+      bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
+      bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
+      if (SrcIsSSE && DstIsSSE)
          continue;
+
+      if (!SrcIsSSE && !DstIsSSE) {
+        // If this is an FPStack extension, it is a noop.
+        if (N->getOpcode() == ISD::STRICT_FP_EXTEND)
+          continue;
+        // If this is a value-preserving FPStack truncation, it is a noop.
+        if (N->getConstantOperandVal(2))
+          continue;
+      }
+
+      // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+      // FPStack has extload and truncstore.  SSE can fold direct loads into other
+      // operations.  Based on this, decide what we want to do.
+      MVT MemVT;
+      if (N->getOpcode() == ISD::STRICT_FP_ROUND)
+        MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+      else
+        MemVT = SrcIsSSE ? SrcVT : DstVT;
+
+      SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+      SDLoc dl(N);
+
+      // FIXME: optimize the case where the src/dest is a load or store?
+
+      //Since the operation is StrictFP, use the preexisting chain.
+      SDValue Store = CurDAG->getTruncStore(N->getOperand(0), dl, N->getOperand(1),
+                                MemTmp, MachinePointerInfo(), MemVT);
+      SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                          MachinePointerInfo(), MemVT);
+
+      // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+      // extload we created.  This will cause general havok on the dag because
+      // anything below the conversion could be folded into other existing nodes.
+      // To avoid invalidating 'I', back it up to the convert node.
+      --I;
+      CurDAG->ReplaceAllUsesWith(N, Result.getNode());
+      break;
+    }
      }
  
-    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
-    // FPStack has extload and truncstore.  SSE can fold direct loads into other
-    // operations.  Based on this, decide what we want to do.
-    MVT MemVT;
-    if (N->getOpcode() == ISD::FP_ROUND)
-      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
-    else
-      MemVT = SrcIsSSE ? SrcVT : DstVT;
-
-    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
-    SDLoc dl(N);
-
-    // FIXME: optimize the case where the src/dest is a load or store?
-    SDValue Store =
-        CurDAG->getTruncStore(CurDAG->getEntryNode(), dl, N->getOperand(0),
-                              MemTmp, MachinePointerInfo(), MemVT);
-    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
-                                        MachinePointerInfo(), MemVT);
-
-    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
-    // extload we created.  This will cause general havok on the dag because
-    // anything below the conversion could be folded into other existing nodes.
-    // To avoid invalidating 'I', back it up to the convert node.
-    --I;
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
  
      // Now that we did that, the node is dead.  Increment the iterator to the
      // next node to process, then delete N.
diff --git a/test/CodeGen/X86/constrained-fp80-trunc-ext.ll b/test/CodeGen/X86/constrained-fp80-trunc-ext.ll

new file mode 100644 (file)

index 0000000..ae07f84
--- /dev/null
+++ b/test/CodeGen/X86/constrained-fp80-trunc-ext.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=x86_64-gnu-linux < %s | FileCheck %s
+
+define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) {
+; CHECK-LABEL: constrained_fpext_f32_as_fp80:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movss %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    flds -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(
+            float %mem,
+            metadata !"fpexcept.strict")
+  ret x86_fp80 %ext
+}
+
+define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) {
+; CHECK-LABEL: constrained_fptrunc_f80_to_f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstps -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    retq
+  %trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80(
+             x86_fp80 %reg,
+             metadata !"round.dynamic",
+             metadata !"fpexcept.strict")
+  ret float %trunc
+}
+
+define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) {
+; CHECK-LABEL: constrained_fpext_f64_to_f80:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movsd %xmm0, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    retq
+entry:
+  %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(
+            double %mem,
+            metadata !"fpexcept.strict")
+  ret x86_fp80 %ext
+}
+
+define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) {
+; CHECK-LABEL: constrained_fptrunc_f80_to_f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fstpl -{{[0-9]+}}(%rsp)
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    retq
+  %trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80(
+             x86_fp80 %reg,
+             metadata !"round.dynamic",
+             metadata !"fpexcept.strict")
+  ret double %trunc
+}
+
+declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float, metadata)
+declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f80(x86_fp80, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f80(x86_fp80, metadata, metadata)
author	Kevin P. Neal <kevin.neal@sas.com>
	Fri, 14 Jun 2019 16:28:55 +0000 (16:28 +0000)
committer	Kevin P. Neal <kevin.neal@sas.com>
	Fri, 14 Jun 2019 16:28:55 +0000 (16:28 +0000)
lib/Target/X86/X86ISelDAGToDAG.cpp		patch \| blob \| history
test/CodeGen/X86/constrained-fp80-trunc-ext.ll	[new file with mode: 0644]	patch \| blob