[LoopVectorize] Add FNeg instruction support

author Craig Topper <craig.topper@intel.com>

Thu, 30 May 2019 18:19:35 +0000 (18:19 +0000)

committer Craig Topper <craig.topper@intel.com>

Thu, 30 May 2019 18:19:35 +0000 (18:19 +0000)
author Craig Topper <craig.topper@intel.com>
Thu, 30 May 2019 18:19:35 +0000 (18:19 +0000)
committer Craig Topper <craig.topper@intel.com>
Thu, 30 May 2019 18:19:35 +0000 (18:19 +0000)
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h

index d052666354f79864a89dec1ebf803e7bd534d182..6cda431a3b8528d86c6197c3194ab0bcd9736ccb 100644 (file)
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -1383,6 +1383,24 @@ public:
      return Insert(UnOp, Name);
    }
  
+  /// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
+  /// Correct number of operands must be passed accordingly.
+  Value *CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
+                      const Twine &Name = "",
+                      MDNode *FPMathTag = nullptr) {
+    if (Instruction::isBinaryOp(Opc)) {
+      assert(Ops.size() == 2 && "Invalid number of operands!");
+      return CreateBinOp(static_cast<Instruction::BinaryOps>(Opc),
+                         Ops[0], Ops[1], Name, FPMathTag);
+    }
+    if (Instruction::isUnaryOp(Opc)) {
+      assert(Ops.size() == 1 && "Invalid number of operands!");
+      return CreateUnOp(static_cast<Instruction::UnaryOps>(Opc),
+                        Ops[0], Name, FPMathTag);
+    }
+    llvm_unreachable("Unexpected opcode!");
+  }
+
    //===--------------------------------------------------------------------===//
    // Instruction creation methods: Memory Instructions
    //===--------------------------------------------------------------------===//
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp

index ad3030cedc30bcd8af9a09184bc653ba1c26a7ec..a43a76724c894e2898af40447a7d2613f9269f7e 100644 (file)
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3969,6 +3969,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
    case Instruction::FAdd:
    case Instruction::Sub:
    case Instruction::FSub:
+  case Instruction::FNeg:
    case Instruction::Mul:
    case Instruction::FMul:
    case Instruction::FDiv:
@@ -3979,21 +3980,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
    case Instruction::And:
    case Instruction::Or:
    case Instruction::Xor: {
-    // Just widen binops.
-    auto *BinOp = cast<BinaryOperator>(&I);
-    setDebugLocFromInst(Builder, BinOp);
+    // Just widen unops and binops.
+    setDebugLocFromInst(Builder, &I);
  
      for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
-      Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
-      Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+      SmallVector<Value *, 2> Ops;
+      for (Value *Op : I.operands())
+        Ops.push_back(getOrCreateVectorValue(Op, Part));
+
+      Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
  
-      if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
-        VecOp->copyIRFlags(BinOp);
+      if (auto *VecOp = dyn_cast<Instruction>(V))
+        VecOp->copyIRFlags(&I);
  
        // Use this vector value for all users of the original instruction.
        VectorLoopValueMap.setVectorValue(&I, Part, V);
-      addMetadata(V, BinOp);
+      addMetadata(V, &I);
      }
  
      break;
@@ -5960,6 +5962,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                     I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
                     Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
    }
+  case Instruction::FNeg: {
+    unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
+    return N * TTI.getArithmeticInstrCost(
+                   I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
+                   TargetTransformInfo::OK_AnyValue,
+                   TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
+                   I->getOperand(0));
+  }
    case Instruction::Select: {
      SelectInst *SI = cast<SelectInst>(I);
      const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
@@ -6589,6 +6599,7 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
      case Instruction::FCmp:
      case Instruction::FDiv:
      case Instruction::FMul:
+    case Instruction::FNeg:
      case Instruction::FPExt:
      case Instruction::FPToSI:
      case Instruction::FPToUI:
diff --git a/test/Transforms/LoopVectorize/X86/fneg-cost.ll b/test/Transforms/LoopVectorize/X86/fneg-cost.ll

index 6589871674291c5ab755d83d04f5c54c692abd2d..5aedf451ed20af6870cf797b240133e4b74d72c3 100644 (file)
--- a/test/Transforms/LoopVectorize/X86/fneg-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/fneg-cost.ll
@@ -5,9 +5,9 @@
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx10.8.0"
  
-; CHECK: Found an estimated cost of 2 for VF 1 For instruction:   %neg = fneg float %{{.*}}
-; CHECK: Found an estimated cost of 6 for VF 2 For instruction:   %neg = fneg float %{{.*}}
-; CHECK: Found an estimated cost of 14 for VF 4 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 4 for VF 1 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 4 for VF 2 For instruction:   %neg = fneg float %{{.*}}
+; CHECK: Found an estimated cost of 4 for VF 4 For instruction:   %neg = fneg float %{{.*}}
  define void @fneg_cost(float* %a, i64 %n) {
  entry:
    br label %for.body
diff --git a/test/Transforms/LoopVectorize/fneg.ll b/test/Transforms/LoopVectorize/fneg.ll

index 8e5e2aae9fdd3f408b3a3db51aaa2010bec532cc..103e795b2115f33c44e29d676f909d050c60f1ce 100644 (file)
--- a/test/Transforms/LoopVectorize/fneg.ll
+++ b/test/Transforms/LoopVectorize/fneg.ll
@@ -3,19 +3,8 @@
  define void @foo(float* %a, i64 %n) {
  ; CHECK:       vector.body:
  ; CHECK:         [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = fneg float [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
-; CHECK-NEXT:    [[TMP7:%.*]] = fneg float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
-; CHECK-NEXT:    [[TMP9:%.*]] = fneg float [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
-; CHECK-NEXT:    [[TMP11:%.*]] = fneg float [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
-; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i32 2
-; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 3
-; CHECK:         store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
+; CHECK:         store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
  ;
  entry:
    br label %for.body
author	Craig Topper <craig.topper@intel.com>
	Thu, 30 May 2019 18:19:35 +0000 (18:19 +0000)
committer	Craig Topper <craig.topper@intel.com>
	Thu, 30 May 2019 18:19:35 +0000 (18:19 +0000)
include/llvm/IR/IRBuilder.h		patch \| blob \| history
lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
test/Transforms/LoopVectorize/X86/fneg-cost.ll		patch \| blob \| history
test/Transforms/LoopVectorize/fneg.ll		patch \| blob \| history