[LoopUtils][SLPVectorizer] clean up management of fast-math-flags

author Sanjay Patel <spatel@rotateright.com>

Wed, 5 Jun 2019 14:58:04 +0000 (14:58 +0000)

committer Sanjay Patel <spatel@rotateright.com>

Wed, 5 Jun 2019 14:58:04 +0000 (14:58 +0000)
author Sanjay Patel <spatel@rotateright.com>
Wed, 5 Jun 2019 14:58:04 +0000 (14:58 +0000)
committer Sanjay Patel <spatel@rotateright.com>
Wed, 5 Jun 2019 14:58:04 +0000 (14:58 +0000)
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h

index 1aad257e0870225f4a31f22099420fdfead4c67e..2f2365ad5c3a54e5a8404815f33055a122ba555a 100644 (file)
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -299,10 +299,10 @@ getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op,
                      ArrayRef<Value *> RedOps = None);
  
  /// Generates a vector reduction using shufflevectors to reduce the value.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
  Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                             RecurrenceDescriptor::MinMaxRecurrenceKind
                                 MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
-                           FastMathFlags FMF = FastMathFlags(),
                             ArrayRef<Value *> RedOps = None);
  
  /// Create a target reduction of the given vector. The reduction operation
@@ -310,17 +310,18 @@ Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
  /// additional information supplied in \p Flags.
  /// The target is queried to determine if intrinsics or shuffle sequences are
  /// required to implement the reduction.
+/// Fast-math-flags are propagated using the IRBuilder's setting.
  Value *createSimpleTargetReduction(IRBuilder<> &B,
                                     const TargetTransformInfo *TTI,
                                     unsigned Opcode, Value *Src,
                                     TargetTransformInfo::ReductionFlags Flags =
                                         TargetTransformInfo::ReductionFlags(),
-                                   FastMathFlags FMF = FastMathFlags(),
                                     ArrayRef<Value *> RedOps = None);
  
  /// Create a generic target reduction using a recurrence descriptor \p Desc
  /// The target is queried to determine if intrinsics or shuffle sequences are
  /// required to implement the reduction.
+/// Fast-math-flags are propagated using the RecurrenceDescriptor.
  Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
                               RecurrenceDescriptor &Desc, Value *Src,
                               bool NoNaN = false);
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp

index 34858883298edfb2fa22364098e44bca2ecb262e..340ee19c3391094538c7c1507381ae619335fd98 100644 (file)
--- a/lib/CodeGen/ExpandReductions.cpp
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -118,11 +118,14 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
      }
      if (!TTI->shouldExpandReduction(II))
        continue;
+    // Propagate FMF using the builder.
      FastMathFlags FMF =
          isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+    IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+    Builder.setFastMathFlags(FMF);
      Value *Rdx =
          IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
-                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK, FMF);
+                  : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
      II->replaceAllUsesWith(Rdx);
      II->eraseFromParent();
      Changed = true;
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp

index 16971b2edbb6cdfbfa613e4234bd718941f9ff82..03d84c39b66fb53815f4b240f895a74b0e1c025c 100644 (file)
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -675,12 +675,6 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
    return true;
  }
  
-static Value *addFastMathFlag(Value *V, FastMathFlags FMF) {
-  if (isa<FPMathOperator>(V))
-    cast<Instruction>(V)->setFastMathFlags(FMF);
-  return V;
-}
-
  Value *llvm::createMinMaxOp(IRBuilder<> &Builder,
                              RecurrenceDescriptor::MinMaxRecurrenceKind RK,
                              Value *Left, Value *Right) {
@@ -761,7 +755,7 @@ llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src,
  Value *
  llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
                            RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
-                          FastMathFlags FMF, ArrayRef<Value *> RedOps) {
+                          ArrayRef<Value *> RedOps) {
    unsigned VF = Src->getType()->getVectorNumElements();
    // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
    // and vector ops, reducing the set of values being computed by half each
@@ -784,10 +778,9 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
          ConstantVector::get(ShuffleMask), "rdx.shuf");
  
      if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
-      // Floating point operations had to be 'fast' to enable the reduction.
-      TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
-                                                   TmpVec, Shuf, "bin.rdx"),
-                               FMF);
+      // The builder propagates its fast-math-flags setting.
+      TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf,
+                                   "bin.rdx");
      } else {
        assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
               "Invalid min/max");
@@ -804,7 +797,7 @@ llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
  /// flags (if generating min/max reductions).
  Value *llvm::createSimpleTargetReduction(
      IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
-    Value *Src, TargetTransformInfo::ReductionFlags Flags, FastMathFlags FMF,
+    Value *Src, TargetTransformInfo::ReductionFlags Flags,
      ArrayRef<Value *> RedOps) {
    assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
  
@@ -874,7 +867,7 @@ Value *llvm::createSimpleTargetReduction(
    }
    if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
      return BuildFunc();
-  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, FMF, RedOps);
+  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
  }
  
  /// Create a vector reduction using a given recurrence descriptor.
@@ -887,39 +880,36 @@ Value *llvm::createTargetReduction(IRBuilder<> &B,
    RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
    TargetTransformInfo::ReductionFlags Flags;
    Flags.NoNaN = NoNaN;
+
+  // All ops in the reduction inherit fast-math-flags from the recurrence
+  // descriptor.
+  IRBuilder<>::FastMathFlagGuard FMFGuard(B);
+  B.setFastMathFlags(Desc.getFastMathFlags());
+
    switch (RecKind) {
    case RD::RK_FloatAdd:
-    return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
    case RD::RK_FloatMult:
-    return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
    case RD::RK_IntegerAdd:
-    return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
    case RD::RK_IntegerMult:
-    return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
    case RD::RK_IntegerAnd:
-    return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
    case RD::RK_IntegerOr:
-    return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
    case RD::RK_IntegerXor:
-    return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
    case RD::RK_IntegerMinMax: {
      RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
      Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
      Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
-    return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
    }
    case RD::RK_FloatMinMax: {
      Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
-    return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags,
-                                       Desc.getFastMathFlags());
+    return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
    }
    default:
      llvm_unreachable("Unhandled RecKind");
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp

index c0c2c85b8f48336d074b0580af6ef3b0376136b3..72fc9cf41ef3b62eeb22e18652035fbbecb0c9fc 100644 (file)
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6105,6 +6105,9 @@ public:
      unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
  
      Value *VectorizedTree = nullptr;
+
+    // FIXME: Fast-math-flags should be set based on the instructions in the
+    //        reduction (not all of 'fast' are required).
      IRBuilder<> Builder(cast<Instruction>(ReductionRoot));
      FastMathFlags Unsafe;
      Unsafe.setFast();
@@ -6294,11 +6297,14 @@ private:
      assert(isPowerOf2_32(ReduxWidth) &&
             "We only handle power-of-two reductions for now");
  
-    if (!IsPairwiseReduction)
+    if (!IsPairwiseReduction) {
+      // FIXME: The builder should use an FMF guard. It should not be hard-coded
+      //        to 'fast'.
+      assert(Builder.getFastMathFlags().isFast() && "Expected 'fast' FMF");
        return createSimpleTargetReduction(
            Builder, TTI, ReductionData.getOpcode(), VectorizedValue,
-          ReductionData.getFlags(), FastMathFlags::getFast(),
-          ReductionOps.back());
+          ReductionData.getFlags(), ReductionOps.back());
+    }
  
      Value *TmpVec = VectorizedValue;
      for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
author	Sanjay Patel <spatel@rotateright.com>
	Wed, 5 Jun 2019 14:58:04 +0000 (14:58 +0000)
committer	Sanjay Patel <spatel@rotateright.com>
	Wed, 5 Jun 2019 14:58:04 +0000 (14:58 +0000)
include/llvm/Transforms/Utils/LoopUtils.h		patch \| blob \| history
lib/CodeGen/ExpandReductions.cpp		patch \| blob \| history
lib/Transforms/Utils/LoopUtils.cpp		patch \| blob \| history
lib/Transforms/Vectorize/SLPVectorizer.cpp		patch \| blob \| history