[DAG][X86] Convert isNegatibleForFree/GetNegatedExpression to a target hook (PR42863)

author Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 19 Sep 2019 15:02:47 +0000 (15:02 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Thu, 19 Sep 2019 15:02:47 +0000 (15:02 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 19 Sep 2019 15:02:47 +0000 (15:02 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Thu, 19 Sep 2019 15:02:47 +0000 (15:02 +0000)
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h

index 608692fcb57cca3b9b2a4c148ab0052175eb6a0f..74763f9ac816164d5c0f633a6d55256af448bc01 100644 (file)
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -3365,6 +3365,18 @@ public:
      llvm_unreachable("Not Implemented");
    }
  
+  /// Return 1 if we can compute the negated form of the specified expression
+  /// for the same cost as the expression itself, or 2 if we can compute the
+  /// negated form more cheaply than the expression itself. Else return 0.
+  virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+                                  bool LegalOperations, bool ForCodeSize,
+                                  unsigned Depth = 0) const;
+
+  /// If isNegatibleForFree returns true, return the newly negated expression.
+  virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+                                       bool LegalOperations, bool ForCodeSize,
+                                       unsigned Depth = 0) const;
+
    //===--------------------------------------------------------------------===//
    // Lowering methods - These methods must be implemented by targets so that
    // the SelectionDAGBuilder code knows how to lower these.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 2b47407393ddd1520377fe7a826c57d048a11b76..9b678b22b46cfd8033b5b9ec94ce6cbbcb7370f4 100644 (file)
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -785,252 +785,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
    DAG.DeleteNode(N);
  }
  
-/// Return 1 if we can compute the negated form of the specified expression for
-/// the same cost as the expression itself, or 2 if we can compute the negated
-/// form more cheaply than the expression itself.
-static char isNegatibleForFree(SDValue Op, bool LegalOperations,
-                               const TargetLowering &TLI,
-                               const TargetOptions *Options,
-                               bool ForCodeSize,
-                               unsigned Depth = 0) {
-  // fneg is removable even if it has multiple uses.
-  if (Op.getOpcode() == ISD::FNEG)
-    return 2;
-
-  // Don't allow anything with multiple uses unless we know it is free.
-  EVT VT = Op.getValueType();
-  const SDNodeFlags Flags = Op->getFlags();
-  if (!Op.hasOneUse() &&
-      !(Op.getOpcode() == ISD::FP_EXTEND &&
-        TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
-    return 0;
-
-  // Don't recurse exponentially.
-  if (Depth > SelectionDAG::MaxRecursionDepth)
-    return 0;
-
-  switch (Op.getOpcode()) {
-  default: return false;
-  case ISD::ConstantFP: {
-    if (!LegalOperations)
-      return 1;
-
-    // Don't invert constant FP values after legalization unless the target says
-    // the negated constant is legal.
-    return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
-           TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
-                            ForCodeSize);
-  }
-  case ISD::BUILD_VECTOR: {
-    // Only permit BUILD_VECTOR of constants.
-    if (llvm::any_of(Op->op_values(), [&](SDValue N) {
-          return !N.isUndef() && !isa<ConstantFPSDNode>(N);
-        }))
-      return 0;
-    if (!LegalOperations)
-      return 1;
-    if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
-        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
-      return 1;
-    return llvm::all_of(Op->op_values(), [&](SDValue N) {
-      return N.isUndef() ||
-             TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
-                              ForCodeSize);
-    });
-  }
-  case ISD::FADD:
-    if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
-      return 0;
-
-    // After operation legalization, it might not be legal to create new FSUBs.
-    if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
-      return 0;
-
-    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
-                                    Options, ForCodeSize, Depth + 1))
-      return V;
-    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
-                              ForCodeSize, Depth + 1);
-  case ISD::FSUB:
-    // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
-      return 0;
-
-    // fold (fneg (fsub A, B)) -> (fsub B, A)
-    return 1;
-
-  case ISD::FMUL:
-  case ISD::FDIV:
-    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
-                                    Options, ForCodeSize, Depth + 1))
-      return V;
-
-    // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
-    if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
-      if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
-        return 0;
-
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
-                              ForCodeSize, Depth + 1);
-
-  case ISD::FMA:
-  case ISD::FMAD: {
-    if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
-      return 0;
-
-    // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
-    // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
-    char V2 = isNegatibleForFree(Op.getOperand(2), LegalOperations, TLI,
-                                 Options, ForCodeSize, Depth + 1);
-    if (!V2)
-      return 0;
-
-    // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
-    char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
-                                 Options, ForCodeSize, Depth + 1);
-    char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI,
-                                 Options, ForCodeSize, Depth + 1);
-    char V01 = std::max(V0, V1);
-    return V01 ? std::max(V01, V2) : 0;
-  }
-
-  case ISD::FP_EXTEND:
-  case ISD::FP_ROUND:
-  case ISD::FSIN:
-    return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
-                              ForCodeSize, Depth + 1);
-  }
-}
-
-/// If isNegatibleForFree returns true, return the newly negated expression.
-static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
-                                    bool LegalOperations, bool ForCodeSize,
-                                    unsigned Depth = 0) {
-  // fneg is removable even if it has multiple uses.
-  if (Op.getOpcode() == ISD::FNEG)
-    return Op.getOperand(0);
-
-  assert(Depth <= SelectionDAG::MaxRecursionDepth &&
-         "GetNegatedExpression doesn't match isNegatibleForFree");
-  const TargetOptions &Options = DAG.getTarget().Options;
-  const SDNodeFlags Flags = Op->getFlags();
-
-  switch (Op.getOpcode()) {
-  default: llvm_unreachable("Unknown code");
-  case ISD::ConstantFP: {
-    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
-    V.changeSign();
-    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
-  }
-  case ISD::BUILD_VECTOR: {
-    SmallVector<SDValue, 4> Ops;
-    for (SDValue C : Op->op_values()) {
-      if (C.isUndef()) {
-        Ops.push_back(C);
-        continue;
-      }
-      APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
-      V.changeSign();
-      Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
-    }
-    return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
-  }
-  case ISD::FADD:
-    assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
-           "Expected NSZ fp-flag");
-
-    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
-                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
-                           Depth + 1))
-      return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
-                         GetNegatedExpression(Op.getOperand(0), DAG,
-                                              LegalOperations, ForCodeSize,
-                                              Depth + 1),
-                         Op.getOperand(1), Flags);
-    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
-    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
-                       GetNegatedExpression(Op.getOperand(1), DAG,
-                                            LegalOperations, ForCodeSize,
-                                            Depth + 1),
-                       Op.getOperand(0), Flags);
-  case ISD::FSUB:
-    // fold (fneg (fsub 0, B)) -> B
-    if (ConstantFPSDNode *N0CFP =
-            isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
-      if (N0CFP->isZero())
-        return Op.getOperand(1);
-
-    // fold (fneg (fsub A, B)) -> (fsub B, A)
-    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(0), Flags);
-
-  case ISD::FMUL:
-  case ISD::FDIV:
-    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
-                           DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
-                           Depth + 1))
-      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
-                         GetNegatedExpression(Op.getOperand(0), DAG,
-                                              LegalOperations, ForCodeSize,
-                                              Depth + 1),
-                         Op.getOperand(1), Flags);
-
-    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
-    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
-                       Op.getOperand(0),
-                       GetNegatedExpression(Op.getOperand(1), DAG,
-                                            LegalOperations, ForCodeSize,
-                                            Depth + 1), Flags);
-
-  case ISD::FMA:
-  case ISD::FMAD: {
-    assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
-           "Expected NSZ fp-flag");
-
-    SDValue Neg2 = GetNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
-                                        ForCodeSize, Depth + 1);
-
-    char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations,
-                                 DAG.getTargetLoweringInfo(), &Options,
-                                 ForCodeSize, Depth + 1);
-    char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations,
-                                 DAG.getTargetLoweringInfo(), &Options,
-                                 ForCodeSize, Depth + 1);
-    if (V0 >= V1) {
-      // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
-      SDValue Neg0 = GetNegatedExpression(
-          Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
-      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
-                         Op.getOperand(1), Neg2, Flags);
-    }
-
-    // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
-    SDValue Neg1 = GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
-                                        ForCodeSize, Depth + 1);
-    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
-                       Op.getOperand(0), Neg1, Neg2, Flags);
-  }
-
-  case ISD::FP_EXTEND:
-  case ISD::FSIN:
-    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
-                       GetNegatedExpression(Op.getOperand(0), DAG,
-                                            LegalOperations, ForCodeSize,
-                                            Depth + 1));
-  case ISD::FP_ROUND:
-    return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
-                       GetNegatedExpression(Op.getOperand(0), DAG,
-                                            LegalOperations, ForCodeSize,
-                                            Depth + 1),
-                       Op.getOperand(1));
-  }
-}
-
  // APInts must be the same size for most operations, this helper
  // function zero extends the shorter of the pair so that they match.
  // We provide an Offset so that we can create bitwidths that won't overflow.
@@ -12052,17 +11806,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
  
    // fold (fadd A, (fneg B)) -> (fsub A, B)
    if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
-    return DAG.getNode(ISD::FSUB, DL, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations,
-                                            ForCodeSize), Flags);
+      TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
+    return DAG.getNode(
+        ISD::FSUB, DL, VT, N0,
+        TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
  
    // fold (fadd (fneg A), B) -> (fsub B, A)
    if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
-      isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
-    return DAG.getNode(ISD::FSUB, DL, VT, N1,
-                       GetNegatedExpression(N0, DAG, LegalOperations,
-                                            ForCodeSize), Flags);
+      TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
+    return DAG.getNode(
+        ISD::FSUB, DL, VT, N1,
+        TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
  
    auto isFMulNegTwo = [](SDValue FMul) {
      if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12241,16 +11995,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    if (N0CFP && N0CFP->isZero()) {
      if (N0CFP->isNegative() ||
          (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
-      if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
-        return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+      if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+        return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
        if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
          return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
      }
    }
  
    if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
-      (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
-      && N1.getOpcode() == ISD::FADD) {
+       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+      N1.getOpcode() == ISD::FADD) {
      // X - (X + Y) -> -Y
      if (N0 == N1->getOperand(0))
        return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
@@ -12260,10 +12014,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
    }
  
    // fold (fsub A, (fneg B)) -> (fadd A, B)
-  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
-    return DAG.getNode(ISD::FADD, DL, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations,
-                                            ForCodeSize), Flags);
+  if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+    return DAG.getNode(
+        ISD::FADD, DL, VT, N0,
+        TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
  
    // FSUB -> FMA combines:
    if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12277,11 +12031,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
  /// Return true if both inputs are at least as cheap in negated form and at
  /// least one input is strictly cheaper in negated form.
  bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
-  const TargetOptions &Options = DAG.getTarget().Options;
-  if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options,
-                                   ForCodeSize))
-    if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options,
-                                         ForCodeSize))
+  if (char LHSNeg =
+          TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
+    if (char RHSNeg =
+            TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
        // Both negated operands are at least as cheap as their counterparts.
        // Check to see if at least one is cheaper negated.
        if (LHSNeg == 2 || RHSNeg == 2)
@@ -12362,8 +12115,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
  
    // -N0 * -N1 --> N0 * N1
    if (isCheaperToUseNegatedFPOps(N0, N1)) {
-    SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
-    SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN0 =
+        TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN1 =
+        TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
      return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
    }
  
@@ -12445,8 +12200,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
  
    // (-N0 * -N1) + N2 --> (N0 * N1) + N2
    if (isCheaperToUseNegatedFPOps(N0, N1)) {
-    SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
-    SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN0 =
+        TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+    SDValue NegN1 =
+        TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
      return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
    }
  
@@ -12707,8 +12464,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
    if (isCheaperToUseNegatedFPOps(N0, N1))
      return DAG.getNode(
          ISD::FDIV, SDLoc(N), VT,
-        GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
-        GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
+        TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
+        TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
  
    return SDValue();
  }
@@ -13262,9 +13019,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
    if (isConstantFPBuildVectorOrConstantFP(N0))
      return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
  
-  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
-                         &DAG.getTarget().Options, ForCodeSize))
-    return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+  if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
+    return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
  
    // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
    // constant pool values.
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index fbe4e8cca8eac5aa08634866704a5a775e4f0601..0b6f8ba4257cafa58859311febf5579f133b8c9b 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5331,6 +5331,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
    return false;
  }
  
+char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+                                        bool LegalOperations, bool ForCodeSize,
+                                        unsigned Depth) const {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG)
+    return 2;
+
+  // Don't allow anything with multiple uses unless we know it is free.
+  EVT VT = Op.getValueType();
+  const SDNodeFlags Flags = Op->getFlags();
+  const TargetOptions &Options = DAG.getTarget().Options;
+  if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
+                           isFPExtFree(VT, Op.getOperand(0).getValueType())))
+    return 0;
+
+  // Don't recurse exponentially.
+  if (Depth > SelectionDAG::MaxRecursionDepth)
+    return 0;
+
+  switch (Op.getOpcode()) {
+  case ISD::ConstantFP: {
+    if (!LegalOperations)
+      return 1;
+
+    // Don't invert constant FP values after legalization unless the target says
+    // the negated constant is legal.
+    return isOperationLegal(ISD::ConstantFP, VT) ||
+           isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+                        ForCodeSize);
+  }
+  case ISD::BUILD_VECTOR: {
+    // Only permit BUILD_VECTOR of constants.
+    if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+          return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+        }))
+      return 0;
+    if (!LegalOperations)
+      return 1;
+    if (isOperationLegal(ISD::ConstantFP, VT) &&
+        isOperationLegal(ISD::BUILD_VECTOR, VT))
+      return 1;
+    return llvm::all_of(Op->op_values(), [&](SDValue N) {
+      return N.isUndef() ||
+             isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+                          ForCodeSize);
+    });
+  }
+  case ISD::FADD:
+    if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+      return 0;
+
+    // After operation legalization, it might not be legal to create new FSUBs.
+    if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
+      return 0;
+
+    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+    if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+                                    ForCodeSize, Depth + 1))
+      return V;
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+                              ForCodeSize, Depth + 1);
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros.
+    if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+      return 0;
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return 1;
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+    if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+                                    ForCodeSize, Depth + 1))
+      return V;
+
+    // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
+    if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
+      if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
+        return 0;
+
+    return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+                              ForCodeSize, Depth + 1);
+
+  case ISD::FMA:
+  case ISD::FMAD: {
+    if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+      return 0;
+
+    // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+    // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+    char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
+                                 ForCodeSize, Depth + 1);
+    if (!V2)
+      return 0;
+
+    // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
+    char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+                                 ForCodeSize, Depth + 1);
+    char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+                                 ForCodeSize, Depth + 1);
+    char V01 = std::max(V0, V1);
+    return V01 ? std::max(V01, V2) : 0;
+  }
+
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+                              ForCodeSize, Depth + 1);
+  }
+
+  return 0;
+}
+
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+                                             bool LegalOperations,
+                                             bool ForCodeSize,
+                                             unsigned Depth) const {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG)
+    return Op.getOperand(0);
+
+  assert(Depth <= SelectionDAG::MaxRecursionDepth &&
+         "getNegatedExpression doesn't match isNegatibleForFree");
+  const SDNodeFlags Flags = Op->getFlags();
+
+  switch (Op.getOpcode()) {
+  case ISD::ConstantFP: {
+    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+    V.changeSign();
+    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
+  }
+  case ISD::BUILD_VECTOR: {
+    SmallVector<SDValue, 4> Ops;
+    for (SDValue C : Op->op_values()) {
+      if (C.isUndef()) {
+        Ops.push_back(C);
+        continue;
+      }
+      APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+      V.changeSign();
+      Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+    }
+    return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+  }
+  case ISD::FADD:
+    assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+            Flags.hasNoSignedZeros()) &&
+           "Expected NSZ fp-flag");
+
+    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+    if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+                           Depth + 1))
+      return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+                         getNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, ForCodeSize,
+                                              Depth + 1),
+                         Op.getOperand(1), Flags);
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+                       getNegatedExpression(Op.getOperand(1), DAG,
+                                            LegalOperations, ForCodeSize,
+                                            Depth + 1),
+                       Op.getOperand(0), Flags);
+  case ISD::FSUB:
+    // fold (fneg (fsub 0, B)) -> B
+    if (ConstantFPSDNode *N0CFP =
+            isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
+      if (N0CFP->isZero())
+        return Op.getOperand(1);
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(0), Flags);
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+    if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+                           Depth + 1))
+      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+                         getNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, ForCodeSize,
+                                              Depth + 1),
+                         Op.getOperand(1), Flags);
+
+    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+    return DAG.getNode(
+        Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
+        getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+                             ForCodeSize, Depth + 1),
+        Flags);
+
+  case ISD::FMA:
+  case ISD::FMAD: {
+    assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+            Flags.hasNoSignedZeros()) &&
+           "Expected NSZ fp-flag");
+
+    SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
+                                        ForCodeSize, Depth + 1);
+
+    char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+                                 ForCodeSize, Depth + 1);
+    char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+                                 ForCodeSize, Depth + 1);
+    if (V0 >= V1) {
+      // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+      SDValue Neg0 = getNegatedExpression(
+          Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
+      return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
+                         Op.getOperand(1), Neg2, Flags);
+    }
+
+    // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+    SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+                                        ForCodeSize, Depth + 1);
+    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(0), Neg1, Neg2, Flags);
+  }
+
+  case ISD::FP_EXTEND:
+  case ISD::FSIN:
+    return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+                       getNegatedExpression(Op.getOperand(0), DAG,
+                                            LegalOperations, ForCodeSize,
+                                            Depth + 1));
+  case ISD::FP_ROUND:
+    return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+                       getNegatedExpression(Op.getOperand(0), DAG,
+                                            LegalOperations, ForCodeSize,
+                                            Depth + 1),
+                       Op.getOperand(1));
+  }
+
+  llvm_unreachable("Unknown code");
+}
+
  //===----------------------------------------------------------------------===//
  // Legalization Utilities
  //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index a9f15ed70a7ce67d9c8fb038106a6bacf9c6c46f..8965a9d1582d1d0ec3d14930fa51a71513b88a38 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -42038,6 +42038,101 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
    return SDValue();
  }
  
+char X86TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+                                           bool LegalOperations,
+                                           bool ForCodeSize,
+                                           unsigned Depth) const {
+  // fneg patterns are removable even if they have multiple uses.
+  if (isFNEG(DAG, Op.getNode()))
+    return 2;
+
+  // Don't recurse exponentially.
+  if (Depth > SelectionDAG::MaxRecursionDepth)
+    return 0;
+
+  EVT VT = Op.getValueType();
+  EVT SVT = VT.getScalarType();
+  switch (Op.getOpcode()) {
+  case ISD::FMA:
+  case X86ISD::FMSUB:
+  case X86ISD::FNMADD:
+  case X86ISD::FNMSUB:
+  case X86ISD::FMADD_RND:
+  case X86ISD::FMSUB_RND:
+  case X86ISD::FNMADD_RND:
+  case X86ISD::FNMSUB_RND: {
+    if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) ||
+        !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations)
+      break;
+
+    // This is always negatible for free but we might be able to remove some
+    // extra operand negations as well.
+    for (int i = 0; i != 3; ++i) {
+      char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations,
+                                  ForCodeSize, Depth + 1);
+      if (V == 2)
+        return V;
+    }
+    return 1;
+  }
+  }
+
+  return TargetLowering::isNegatibleForFree(Op, DAG, LegalOperations,
+                                            ForCodeSize, Depth);
+}
+
+SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+                                                bool LegalOperations,
+                                                bool ForCodeSize,
+                                                unsigned Depth) const {
+  // fneg patterns are removable even if they have multiple uses.
+  if (SDValue Arg = isFNEG(DAG, Op.getNode()))
+    return DAG.getBitcast(Op.getValueType(), Arg);
+
+  EVT VT = Op.getValueType();
+  EVT SVT = VT.getScalarType();
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  case ISD::FMA:
+  case X86ISD::FMSUB:
+  case X86ISD::FNMADD:
+  case X86ISD::FNMSUB:
+  case X86ISD::FMADD_RND:
+  case X86ISD::FMSUB_RND:
+  case X86ISD::FNMADD_RND:
+  case X86ISD::FNMSUB_RND: {
+    if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) ||
+        !(SVT == MVT::f32 || SVT == MVT::f64) || !LegalOperations)
+      break;
+
+    // This is always negatible for free but we might be able to remove some
+    // extra operand negations as well.
+    SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue());
+    for (int i = 0; i != 3; ++i) {
+      char V = isNegatibleForFree(Op.getOperand(i), DAG, LegalOperations,
+                                  ForCodeSize, Depth + 1);
+      if (V == 2)
+        NewOps[i] = getNegatedExpression(Op.getOperand(i), DAG, LegalOperations,
+                                         ForCodeSize, Depth + 1);
+    }
+
+    bool NegA = !!NewOps[0];
+    bool NegB = !!NewOps[1];
+    bool NegC = !!NewOps[2];
+    unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true);
+
+    // Fill in the non-negated ops with the original values.
+    for (int i = 0, e = Op.getNumOperands(); i != e; ++i)
+      if (!NewOps[i])
+        NewOps[i] = Op.getOperand(i);
+    return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps);
+  }
+  }
+
+  return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
+                                              ForCodeSize, Depth);
+}
+
  static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
    MVT VT = N->getSimpleValueType(0);
@@ -42967,12 +43062,14 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
  }
  
  static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
+                          TargetLowering::DAGCombinerInfo &DCI,
                            const X86Subtarget &Subtarget) {
    SDLoc dl(N);
    EVT VT = N->getValueType(0);
  
    // Let legalize expand this if it isn't a legal type yet.
-  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isTypeLegal(VT))
      return SDValue();
  
    EVT ScalarVT = VT.getScalarType();
@@ -42983,17 +43080,21 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
    SDValue B = N->getOperand(1);
    SDValue C = N->getOperand(2);
  
-  auto invertIfNegative = [&DAG](SDValue &V) {
-    if (SDValue NegVal = isFNEG(DAG, V.getNode())) {
-      V = DAG.getBitcast(V.getValueType(), NegVal);
+  auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
+    bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+    bool LegalOperations = !DCI.isBeforeLegalizeOps();
+    if (TLI.isNegatibleForFree(V, DAG, LegalOperations, CodeSize) == 2) {
+      V = TLI.getNegatedExpression(V, DAG, LegalOperations, CodeSize);
        return true;
      }
      // Look through extract_vector_elts. If it comes from an FNEG, create a
      // new extract from the FNEG input.
      if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
          isNullConstant(V.getOperand(1))) {
-      if (SDValue NegVal = isFNEG(DAG, V.getOperand(0).getNode())) {
-        NegVal = DAG.getBitcast(V.getOperand(0).getValueType(), NegVal);
+      SDValue Vec = V.getOperand(0);
+      if (TLI.isNegatibleForFree(Vec, DAG, LegalOperations, CodeSize) == 2) {
+        SDValue NegVal =
+            TLI.getNegatedExpression(Vec, DAG, LegalOperations, CodeSize);
          V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(),
                          NegVal, V.getOperand(1));
          return true;
@@ -43023,25 +43124,25 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
  // Combine FMADDSUB(A, B, FNEG(C)) -> FMSUBADD(A, B, C)
  // Combine FMSUBADD(A, B, FNEG(C)) -> FMADDSUB(A, B, C)
  static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
-                               const X86Subtarget &Subtarget) {
+                               TargetLowering::DAGCombinerInfo &DCI) {
    SDLoc dl(N);
    EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
+  bool LegalOperations = !DCI.isBeforeLegalizeOps();
  
-  SDValue NegVal = isFNEG(DAG, N->getOperand(2).getNode());
-  if (!NegVal)
-    return SDValue();
-
-  // FIXME: Should we bitcast instead?
-  if (NegVal.getValueType() != VT)
+  SDValue N2 = N->getOperand(2);
+  if (!TLI.isNegatibleForFree(N2, DAG, LegalOperations, CodeSize))
      return SDValue();
  
+  SDValue NegN2 = TLI.getNegatedExpression(N2, DAG, LegalOperations, CodeSize);
    unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false);
  
    if (N->getNumOperands() == 4)
      return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),
-                       NegVal, N->getOperand(3));
+                       NegN2, N->getOperand(3));
    return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1),
-                     NegVal);
+                     NegN2);
  }
  
  static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
@@ -45316,11 +45417,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
    case X86ISD::FNMADD_RND:
    case X86ISD::FNMSUB:
    case X86ISD::FNMSUB_RND:
-  case ISD::FMA: return combineFMA(N, DAG, Subtarget);
+  case ISD::FMA: return combineFMA(N, DAG, DCI, Subtarget);
    case X86ISD::FMADDSUB_RND:
    case X86ISD::FMSUBADD_RND:
    case X86ISD::FMADDSUB:
-  case X86ISD::FMSUBADD:    return combineFMADDSUB(N, DAG, Subtarget);
+  case X86ISD::FMSUBADD:    return combineFMADDSUB(N, DAG, DCI);
    case X86ISD::MOVMSK:      return combineMOVMSK(N, DAG, DCI, Subtarget);
    case X86ISD::MGATHER:
    case X86ISD::MSCATTER:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 4470de2d37806567d66e07abad7efbe0b5e0a7a0..2cd49e4778229e019b0a37bd3d580c660b9068fa 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -798,6 +798,17 @@ namespace llvm {
      /// and some i16 instructions are slow.
      bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
  
+    /// Return 1 if we can compute the negated form of the specified expression
+    /// for the same cost as the expression itself, or 2 if we can compute the
+    /// negated form more cheaply than the expression itself. Else return 0.
+    char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
+                            bool ForCodeSize, unsigned Depth) const override;
+
+    /// If isNegatibleForFree returns true, return the newly negated expression.
+    SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+                                 bool LegalOperations, bool ForCodeSize,
+                                 unsigned Depth) const override;
+
      MachineBasicBlock *
      EmitInstrWithCustomInserter(MachineInstr &MI,
                                  MachineBasicBlock *MBB) const override;
diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll

index c618c37e4fea735c46bc0117f2114b4206f67110..5d05bd401e24c036d39fd9f9867ae0103d8299d4 100644 (file)
--- a/test/CodeGen/X86/recip-fastmath.ll
+++ b/test/CodeGen/X86/recip-fastmath.ll
@@ -60,15 +60,15 @@ define float @f32_one_step(float %x) #1 {
  ; FMA-RECIP-LABEL: f32_one_step:
  ; FMA-RECIP:       # %bb.0:
  ; FMA-RECIP-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; FMA-RECIP-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
-; FMA-RECIP-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; FMA-RECIP-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
+; FMA-RECIP-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
  ; FMA-RECIP-NEXT:    retq
  ;
  ; BDVER2-LABEL: f32_one_step:
  ; BDVER2:       # %bb.0:
  ; BDVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; BDVER2-NEXT:    vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
-; BDVER2-NEXT:    vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT:    vfmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT:    vfnmaddss %xmm1, %xmm0, %xmm1, %xmm0
  ; BDVER2-NEXT:    retq
  ;
  ; BTVER2-LABEL: f32_one_step:
@@ -94,8 +94,8 @@ define float @f32_one_step(float %x) #1 {
  ; HASWELL-LABEL: f32_one_step:
  ; HASWELL:       # %bb.0:
  ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; HASWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
-; HASWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
+; HASWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
  ; HASWELL-NEXT:    retq
  ;
  ; HASWELL-NO-FMA-LABEL: f32_one_step:
@@ -111,8 +111,8 @@ define float @f32_one_step(float %x) #1 {
  ; AVX512-LABEL: f32_one_step:
  ; AVX512:       # %bb.0:
  ; AVX512-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
-; AVX512-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
+; AVX512-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
  ; AVX512-NEXT:    retq
    %div = fdiv fast float 1.0, %x
    ret float %div
diff --git a/test/CodeGen/X86/recip-fastmath2.ll b/test/CodeGen/X86/recip-fastmath2.ll

index a2bd6c2081c1d626ad8b395b38d7aca6a5ce4da9..c5e364280b802ce2766441d27e29dd3f7d0725f7 100644 (file)
--- a/test/CodeGen/X86/recip-fastmath2.ll
+++ b/test/CodeGen/X86/recip-fastmath2.ll
@@ -154,8 +154,8 @@ define float @f32_one_step_2_divs(float %x) #1 {
  ; FMA-RECIP-LABEL: f32_one_step_2_divs:
  ; FMA-RECIP:       # %bb.0:
  ; FMA-RECIP-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; FMA-RECIP-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
-; FMA-RECIP-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; FMA-RECIP-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
+; FMA-RECIP-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
  ; FMA-RECIP-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1
  ; FMA-RECIP-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; FMA-RECIP-NEXT:    retq
@@ -163,8 +163,8 @@ define float @f32_one_step_2_divs(float %x) #1 {
  ; BDVER2-LABEL: f32_one_step_2_divs:
  ; BDVER2:       # %bb.0:
  ; BDVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; BDVER2-NEXT:    vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
-; BDVER2-NEXT:    vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT:    vfmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT:    vfnmaddss %xmm1, %xmm0, %xmm1, %xmm0
  ; BDVER2-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1
  ; BDVER2-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; BDVER2-NEXT:    retq
@@ -196,8 +196,8 @@ define float @f32_one_step_2_divs(float %x) #1 {
  ; HASWELL-LABEL: f32_one_step_2_divs:
  ; HASWELL:       # %bb.0:
  ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; HASWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
-; HASWELL-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
+; HASWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
  ; HASWELL-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1
  ; HASWELL-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; HASWELL-NEXT:    retq
@@ -217,8 +217,8 @@ define float @f32_one_step_2_divs(float %x) #1 {
  ; AVX512-LABEL: f32_one_step_2_divs:
  ; AVX512:       # %bb.0:
  ; AVX512-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
-; AVX512-NEXT:    vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem
+; AVX512-NEXT:    vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1
  ; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm1
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
@@ -267,8 +267,8 @@ define float @f32_two_step_2(float %x) #2 {
  ; FMA-RECIP:       # %bb.0:
  ; FMA-RECIP-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
  ; FMA-RECIP-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; FMA-RECIP-NEXT:    vfnmadd231ss {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
-; FMA-RECIP-NEXT:    vfmadd132ss {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1
+; FMA-RECIP-NEXT:    vfmadd231ss {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
+; FMA-RECIP-NEXT:    vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
  ; FMA-RECIP-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  ; FMA-RECIP-NEXT:    vmulss %xmm1, %xmm2, %xmm3
  ; FMA-RECIP-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1
@@ -278,9 +278,9 @@ define float @f32_two_step_2(float %x) #2 {
  ; BDVER2-LABEL: f32_two_step_2:
  ; BDVER2:       # %bb.0:
  ; BDVER2-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
-; BDVER2-NEXT:    vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm2
+; BDVER2-NEXT:    vfmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm2
  ; BDVER2-NEXT:    vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
-; BDVER2-NEXT:    vfmaddss %xmm1, %xmm2, %xmm1, %xmm1
+; BDVER2-NEXT:    vfnmaddss %xmm1, %xmm2, %xmm1, %xmm1
  ; BDVER2-NEXT:    vmulss %xmm4, %xmm1, %xmm3
  ; BDVER2-NEXT:    vfnmaddss %xmm4, %xmm3, %xmm0, %xmm0
  ; BDVER2-NEXT:    vfmaddss %xmm3, %xmm0, %xmm1, %xmm0
@@ -322,8 +322,8 @@ define float @f32_two_step_2(float %x) #2 {
  ; HASWELL:       # %bb.0:
  ; HASWELL-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
  ; HASWELL-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; HASWELL-NEXT:    vfnmadd231ss {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
-; HASWELL-NEXT:    vfmadd132ss {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1
+; HASWELL-NEXT:    vfmadd231ss {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
+; HASWELL-NEXT:    vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
  ; HASWELL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  ; HASWELL-NEXT:    vmulss %xmm1, %xmm2, %xmm3
  ; HASWELL-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1
@@ -350,8 +350,8 @@ define float @f32_two_step_2(float %x) #2 {
  ; AVX512:       # %bb.0:
  ; AVX512-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
  ; AVX512-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512-NEXT:    vfnmadd231ss {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
-; AVX512-NEXT:    vfmadd132ss {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1
+; AVX512-NEXT:    vfmadd231ss {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
+; AVX512-NEXT:    vfnmadd132ss {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
  ; AVX512-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
  ; AVX512-NEXT:    vmulss %xmm1, %xmm2, %xmm3
  ; AVX512-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1
@@ -610,9 +610,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
  ; FMA-RECIP-LABEL: v4f32_two_step2:
  ; FMA-RECIP:       # %bb.0:
  ; FMA-RECIP-NEXT:    vrcpps %xmm0, %xmm1
-; FMA-RECIP-NEXT:    vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; FMA-RECIP-NEXT:    vfnmadd231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
-; FMA-RECIP-NEXT:    vfmadd132ps {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1
+; FMA-RECIP-NEXT:    vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; FMA-RECIP-NEXT:    vfmadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
+; FMA-RECIP-NEXT:    vfnmadd132ps {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
  ; FMA-RECIP-NEXT:    vmovaps {{.*#+}} xmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
  ; FMA-RECIP-NEXT:    vmulps %xmm1, %xmm2, %xmm3
  ; FMA-RECIP-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1
@@ -622,9 +622,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
  ; BDVER2-LABEL: v4f32_two_step2:
  ; BDVER2:       # %bb.0:
  ; BDVER2-NEXT:    vrcpps %xmm0, %xmm1
-; BDVER2-NEXT:    vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm2
+; BDVER2-NEXT:    vfmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm2
  ; BDVER2-NEXT:    vmovaps {{.*#+}} xmm4 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
-; BDVER2-NEXT:    vfmaddps %xmm1, %xmm2, %xmm1, %xmm1
+; BDVER2-NEXT:    vfnmaddps %xmm1, %xmm2, %xmm1, %xmm1
  ; BDVER2-NEXT:    vmulps %xmm4, %xmm1, %xmm3
  ; BDVER2-NEXT:    vfnmaddps %xmm4, %xmm3, %xmm0, %xmm0
  ; BDVER2-NEXT:    vfmaddps %xmm3, %xmm0, %xmm1, %xmm0
@@ -665,9 +665,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
  ; HASWELL-LABEL: v4f32_two_step2:
  ; HASWELL:       # %bb.0:
  ; HASWELL-NEXT:    vrcpps %xmm0, %xmm1
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
-; HASWELL-NEXT:    vfmadd132ps {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; HASWELL-NEXT:    vfmadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
+; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
  ; HASWELL-NEXT:    vmovaps {{.*#+}} xmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
  ; HASWELL-NEXT:    vmulps %xmm1, %xmm2, %xmm3
  ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1
@@ -693,9 +693,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
  ; AVX512-LABEL: v4f32_two_step2:
  ; AVX512:       # %bb.0:
  ; AVX512-NEXT:    vrcpps %xmm0, %xmm1
-; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX512-NEXT:    vfnmadd231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
-; AVX512-NEXT:    vfmadd132ps {{.*#+}} xmm2 = (xmm2 * xmm1) + xmm1
+; AVX512-NEXT:    vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; AVX512-NEXT:    vfmadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
+; AVX512-NEXT:    vfnmadd132ps {{.*#+}} xmm2 = -(xmm2 * xmm1) + xmm1
  ; AVX512-NEXT:    vmovaps {{.*#+}} xmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0]
  ; AVX512-NEXT:    vmulps %xmm1, %xmm2, %xmm3
  ; AVX512-NEXT:    vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm1
@@ -987,9 +987,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
  ; FMA-RECIP-LABEL: v8f32_two_step2:
  ; FMA-RECIP:       # %bb.0:
  ; FMA-RECIP-NEXT:    vrcpps %ymm0, %ymm1
-; FMA-RECIP-NEXT:    vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; FMA-RECIP-NEXT:    vfnmadd231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2
-; FMA-RECIP-NEXT:    vfmadd132ps {{.*#+}} ymm2 = (ymm2 * ymm1) + ymm1
+; FMA-RECIP-NEXT:    vmovaps {{.*#+}} ymm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; FMA-RECIP-NEXT:    vfmadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2
+; FMA-RECIP-NEXT:    vfnmadd132ps {{.*#+}} ymm2 = -(ymm2 * ymm1) + ymm1
  ; FMA-RECIP-NEXT:    vmovaps {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0]
  ; FMA-RECIP-NEXT:    vmulps %ymm1, %ymm2, %ymm3
  ; FMA-RECIP-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm1
@@ -999,9 +999,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
  ; BDVER2-LABEL: v8f32_two_step2:
  ; BDVER2:       # %bb.0:
  ; BDVER2-NEXT:    vrcpps %ymm0, %ymm1
-; BDVER2-NEXT:    vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm2
+; BDVER2-NEXT:    vfmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm2
  ; BDVER2-NEXT:    vmovaps {{.*#+}} ymm4 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0]
-; BDVER2-NEXT:    vfmaddps %ymm1, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT:    vfnmaddps %ymm1, %ymm2, %ymm1, %ymm1
  ; BDVER2-NEXT:    vmulps %ymm4, %ymm1, %ymm3
  ; BDVER2-NEXT:    vfnmaddps %ymm4, %ymm3, %ymm0, %ymm0
  ; BDVER2-NEXT:    vfmaddps %ymm3, %ymm0, %ymm1, %ymm0
@@ -1042,9 +1042,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
  ; HASWELL-LABEL: v8f32_two_step2:
  ; HASWELL:       # %bb.0:
  ; HASWELL-NEXT:    vrcpps %ymm0, %ymm1
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2
-; HASWELL-NEXT:    vfmadd132ps {{.*#+}} ymm2 = (ymm2 * ymm1) + ymm1
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; HASWELL-NEXT:    vfmadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2
+; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm2 = -(ymm2 * ymm1) + ymm1
  ; HASWELL-NEXT:    vmovaps {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0]
  ; HASWELL-NEXT:    vmulps %ymm1, %ymm2, %ymm3
  ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm1
@@ -1070,9 +1070,9 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
  ; AVX512-LABEL: v8f32_two_step2:
  ; AVX512:       # %bb.0:
  ; AVX512-NEXT:    vrcpps %ymm0, %ymm1
-; AVX512-NEXT:    vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX512-NEXT:    vfnmadd231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2
-; AVX512-NEXT:    vfmadd132ps {{.*#+}} ymm2 = (ymm2 * ymm1) + ymm1
+; AVX512-NEXT:    vbroadcastss {{.*#+}} ymm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; AVX512-NEXT:    vfmadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2
+; AVX512-NEXT:    vfnmadd132ps {{.*#+}} ymm2 = -(ymm2 * ymm1) + ymm1
  ; AVX512-NEXT:    vmovaps {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0]
  ; AVX512-NEXT:    vmulps %ymm1, %ymm2, %ymm3
  ; AVX512-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm1
@@ -1552,17 +1552,17 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
  ; FMA-RECIP-LABEL: v16f32_two_step2:
  ; FMA-RECIP:       # %bb.0:
  ; FMA-RECIP-NEXT:    vrcpps %ymm0, %ymm2
-; FMA-RECIP-NEXT:    vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; FMA-RECIP-NEXT:    vmovaps {{.*#+}} ymm3 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
  ; FMA-RECIP-NEXT:    vmovaps %ymm2, %ymm4
-; FMA-RECIP-NEXT:    vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
-; FMA-RECIP-NEXT:    vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; FMA-RECIP-NEXT:    vfmadd213ps {{.*#+}} ymm4 = (ymm0 * ymm4) + ymm3
+; FMA-RECIP-NEXT:    vfnmadd132ps {{.*#+}} ymm4 = -(ymm4 * ymm2) + ymm2
  ; FMA-RECIP-NEXT:    vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0]
  ; FMA-RECIP-NEXT:    vmulps %ymm2, %ymm4, %ymm5
  ; FMA-RECIP-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm5 * ymm0) + ymm2
  ; FMA-RECIP-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm5
  ; FMA-RECIP-NEXT:    vrcpps %ymm1, %ymm2
-; FMA-RECIP-NEXT:    vfnmadd231ps {{.*#+}} ymm3 = -(ymm1 * ymm2) + ymm3
-; FMA-RECIP-NEXT:    vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm2) + ymm2
+; FMA-RECIP-NEXT:    vfmadd231ps {{.*#+}} ymm3 = (ymm1 * ymm2) + ymm3
+; FMA-RECIP-NEXT:    vfnmadd132ps {{.*#+}} ymm3 = -(ymm3 * ymm2) + ymm2
  ; FMA-RECIP-NEXT:    vmovaps {{.*#+}} ymm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1]
  ; FMA-RECIP-NEXT:    vmulps %ymm2, %ymm3, %ymm4
  ; FMA-RECIP-NEXT:    vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm2
@@ -1572,17 +1572,17 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
  ; BDVER2-LABEL: v16f32_two_step2:
  ; BDVER2:       # %bb.0:
  ; BDVER2-NEXT:    vrcpps %ymm0, %ymm2
-; BDVER2-NEXT:    vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; BDVER2-NEXT:    vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4
-; BDVER2-NEXT:    vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT:    vmovaps {{.*#+}} ymm3 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; BDVER2-NEXT:    vfmaddps %ymm3, %ymm2, %ymm0, %ymm4
+; BDVER2-NEXT:    vfnmaddps %ymm2, %ymm4, %ymm2, %ymm2
  ; BDVER2-NEXT:    vmovaps {{.*#+}} ymm4 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0]
  ; BDVER2-NEXT:    vmulps %ymm4, %ymm2, %ymm5
  ; BDVER2-NEXT:    vfnmaddps %ymm4, %ymm5, %ymm0, %ymm0
  ; BDVER2-NEXT:    vfmaddps %ymm5, %ymm0, %ymm2, %ymm0
  ; BDVER2-NEXT:    vrcpps %ymm1, %ymm2
  ; BDVER2-NEXT:    vmovaps {{.*#+}} ymm5 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1]
-; BDVER2-NEXT:    vfnmaddps %ymm3, %ymm2, %ymm1, %ymm3
-; BDVER2-NEXT:    vfmaddps %ymm2, %ymm3, %ymm2, %ymm2
+; BDVER2-NEXT:    vfmaddps %ymm3, %ymm2, %ymm1, %ymm3
+; BDVER2-NEXT:    vfnmaddps %ymm2, %ymm3, %ymm2, %ymm2
  ; BDVER2-NEXT:    vmulps %ymm5, %ymm2, %ymm4
  ; BDVER2-NEXT:    vfnmaddps %ymm5, %ymm4, %ymm1, %ymm1
  ; BDVER2-NEXT:    vfmaddps %ymm4, %ymm1, %ymm2, %ymm1
@@ -1645,17 +1645,17 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
  ; HASWELL-LABEL: v16f32_two_step2:
  ; HASWELL:       # %bb.0:
  ; HASWELL-NEXT:    vrcpps %ymm0, %ymm2
-; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT:    vbroadcastss {{.*#+}} ymm3 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
  ; HASWELL-NEXT:    vmovaps %ymm2, %ymm4
-; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
-; HASWELL-NEXT:    vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT:    vfmadd213ps {{.*#+}} ymm4 = (ymm0 * ymm4) + ymm3
+; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm4 = -(ymm4 * ymm2) + ymm2
  ; HASWELL-NEXT:    vmovaps {{.*#+}} ymm2 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0]
  ; HASWELL-NEXT:    vmulps %ymm2, %ymm4, %ymm5
  ; HASWELL-NEXT:    vrcpps %ymm1, %ymm6
  ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm5 * ymm0) + ymm2
  ; HASWELL-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm4 * ymm0) + ymm5
-; HASWELL-NEXT:    vfnmadd231ps {{.*#+}} ymm3 = -(ymm1 * ymm6) + ymm3
-; HASWELL-NEXT:    vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm6) + ymm6
+; HASWELL-NEXT:    vfmadd231ps {{.*#+}} ymm3 = (ymm1 * ymm6) + ymm3
+; HASWELL-NEXT:    vfnmadd132ps {{.*#+}} ymm3 = -(ymm3 * ymm6) + ymm6
  ; HASWELL-NEXT:    vmovaps {{.*#+}} ymm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1]
  ; HASWELL-NEXT:    vmulps %ymm2, %ymm3, %ymm4
  ; HASWELL-NEXT:    vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm2
@@ -1692,9 +1692,9 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
  ; AVX512-LABEL: v16f32_two_step2:
  ; AVX512:       # %bb.0:
  ; AVX512-NEXT:    vrcp14ps %zmm0, %zmm1
-; AVX512-NEXT:    vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX512-NEXT:    vfnmadd231ps {{.*#+}} zmm2 = -(zmm0 * zmm1) + zmm2
-; AVX512-NEXT:    vfmadd132ps {{.*#+}} zmm2 = (zmm2 * zmm1) + zmm1
+; AVX512-NEXT:    vbroadcastss {{.*#+}} zmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
+; AVX512-NEXT:    vfmadd231ps {{.*#+}} zmm2 = (zmm0 * zmm1) + zmm2
+; AVX512-NEXT:    vfnmadd132ps {{.*#+}} zmm2 = -(zmm2 * zmm1) + zmm1
  ; AVX512-NEXT:    vmovaps {{.*#+}} zmm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0,8.0E+0,9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1]
  ; AVX512-NEXT:    vmulps %zmm1, %zmm2, %zmm3
  ; AVX512-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm1
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 19 Sep 2019 15:02:47 +0000 (15:02 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Thu, 19 Sep 2019 15:02:47 +0000 (15:02 +0000)
include/llvm/CodeGen/TargetLowering.h		patch \| blob \| history
lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/CodeGen/X86/recip-fastmath.ll		patch \| blob \| history
test/CodeGen/X86/recip-fastmath2.ll		patch \| blob \| history