Match types of accumulator and result for llvm.experimental.vector.reduce.fadd/fmul

author Sander de Smalen <sander.desmalen@arm.com>

Mon, 20 May 2019 09:54:06 +0000 (09:54 +0000)

committer Sander de Smalen <sander.desmalen@arm.com>

Mon, 20 May 2019 09:54:06 +0000 (09:54 +0000)
author Sander de Smalen <sander.desmalen@arm.com>
Mon, 20 May 2019 09:54:06 +0000 (09:54 +0000)
committer Sander de Smalen <sander.desmalen@arm.com>
Mon, 20 May 2019 09:54:06 +0000 (09:54 +0000)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst

index bfe0a0ab9fecf9d2ab24d8a19a73c85a988311a0..07d755c1d788c85b91c00f3de4dc8dd2752e4202 100644 (file)
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -13580,7 +13580,8 @@ Arguments:
  """"""""""
  The first argument to this intrinsic is a scalar accumulator value, which is
  only used when there are no fast-math flags attached. This argument may be undef
-when fast-math flags are used.
+when fast-math flags are used. The type of the accumulator matches the
+element-type of the vector input.
  
  The second argument must be a vector of floating-point values.
  
@@ -13643,7 +13644,8 @@ Arguments:
  """"""""""
  The first argument to this intrinsic is a scalar accumulator value, which is
  only used when there are no fast-math flags attached. This argument may be undef
-when fast-math flags are used.
+when fast-math flags are used. The type of the accumulator matches the
+element-type of the vector input.
  
  The second argument must be a vector of floating-point values.
  
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td

index 3047c411d1cf1a38feaf7e871fcf770d5a99e614..a1e37b66c5fa2a1d960b4c728b19d894c871d21d 100644 (file)
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -1134,11 +1134,11 @@ def int_memset_element_unordered_atomic
  //===------------------------ Reduction Intrinsics ------------------------===//
  //
  def int_experimental_vector_reduce_fadd : Intrinsic<[llvm_anyfloat_ty],
-                                                    [llvm_anyfloat_ty,
+                                                    [LLVMMatchType<0>,
                                                       llvm_anyvector_ty],
                                                      [IntrNoMem]>;
  def int_experimental_vector_reduce_fmul : Intrinsic<[llvm_anyfloat_ty],
-                                                    [llvm_anyfloat_ty,
+                                                    [LLVMMatchType<0>,
                                                       llvm_anyvector_ty],
                                                      [IntrNoMem]>;
  def int_experimental_vector_reduce_add : Intrinsic<[llvm_anyint_ty],
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp

index a7290cb8c0f02529913ca946192aa873c312561c..68aa18e3f5bb790f935224601d9ecc0361a4d5a1 100644 (file)
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -321,8 +321,7 @@ static CallInst *getReductionIntrinsic(IRBuilderBase *Builder, Intrinsic::ID ID,
  CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
    Module *M = GetInsertBlock()->getParent()->getParent();
    Value *Ops[] = {Acc, Src};
-  Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(),
-                 Src->getType()};
+  Type *Tys[] = {Acc->getType(), Src->getType()};
    auto Decl = Intrinsic::getDeclaration(
        M, Intrinsic::experimental_vector_reduce_fadd, Tys);
    return createCallHelper(Decl, Ops, this);
@@ -331,8 +330,7 @@ CallInst *IRBuilderBase::CreateFAddReduce(Value *Acc, Value *Src) {
  CallInst *IRBuilderBase::CreateFMulReduce(Value *Acc, Value *Src) {
    Module *M = GetInsertBlock()->getParent()->getParent();
    Value *Ops[] = {Acc, Src};
-  Type *Tys[] = {Src->getType()->getVectorElementType(), Acc->getType(),
-                 Src->getType()};
+  Type *Tys[] = {Acc->getType(), Src->getType()};
    auto Decl = Intrinsic::getDeclaration(
        M, Intrinsic::experimental_vector_reduce_fmul, Tys);
    return createCallHelper(Decl, Ops, this);
diff --git a/test/Assembler/invalid-vecreduce.ll b/test/Assembler/invalid-vecreduce.ll

new file mode 100644 (file)

index 0000000..d1ca593
--- /dev/null
+++ b/test/Assembler/invalid-vecreduce.ll
@@ -0,0 +1,34 @@
+; RUN: not opt -S < %s 2>&1 | FileCheck %s
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: float (double, <2 x double>)* @llvm.experimental.vector.reduce.fadd.f32.f64.v2f64
+define float @fadd_invalid_scalar_res(double %acc, <2 x double> %in) {
+  %res = call float @llvm.experimental.vector.reduce.fadd.f32.f64.v2f64(double %acc, <2 x double> %in)
+  ret float %res
+}
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: double (float, <2 x double>)* @llvm.experimental.vector.reduce.fadd.f64.f32.v2f64
+define double @fadd_invalid_scalar_start(float %acc, <2 x double> %in) {
+  %res = call double @llvm.experimental.vector.reduce.fadd.f64.f32.v2f64(float %acc, <2 x double> %in)
+  ret double %res
+}
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <2 x double> (double, <2 x double>)* @llvm.experimental.vector.reduce.fadd.v2f64.f64.v2f64
+define <2 x double> @fadd_invalid_vector_res(double %acc, <2 x double> %in) {
+  %res = call <2 x double> @llvm.experimental.vector.reduce.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in)
+  ret <2 x double> %res
+}
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: double (<2 x double>, <2 x double>)* @llvm.experimental.vector.reduce.fadd.f64.v2f64.v2f64
+define double @fadd_invalid_vector_start(<2 x double> %in, <2 x double> %acc) {
+  %res = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in)
+  ret double %res
+}
+
+declare float @llvm.experimental.vector.reduce.fadd.f32.f64.v2f64(double %acc, <2 x double> %in)
+declare double @llvm.experimental.vector.reduce.fadd.f64.f32.v2f64(float %acc, <2 x double> %in)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in)
+declare <2 x double> @llvm.experimental.vector.reduce.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in)
diff --git a/test/CodeGen/AArch64/vecreduce-fadd.ll b/test/CodeGen/AArch64/vecreduce-fadd.ll

index b4576e7aa6552f0d82e49a5a49cb07c5613007a0..c0f25e505d2c13f0a6e8eaff5be486bd3b10dbd8 100644 (file)
--- a/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -5,7 +5,7 @@ define float @add_HalfS(<2 x float> %bin.rdx)  {
  ; CHECK-LABEL: add_HalfS:
  ; CHECK:       faddp s0, v0.2s
  ; CHECK-NEXT:  ret
-  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(<2 x float> undef, <2 x float> %bin.rdx)
+  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %bin.rdx)
    ret float %r
  }
  
@@ -23,7 +23,7 @@ define half @add_HalfH(<4 x half> %bin.rdx)  {
  ; CHECKNOFP16-NOT:   fadd h{{[0-9]+}}
  ; CHECKNOFP16-NOT:   fadd v{{[0-9]+}}.{{[0-9]}}h
  ; CHECKNOFP16:       ret
-  %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v4f16(<4 x half> undef, <4 x half> %bin.rdx)
+  %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v4f16(half undef, <4 x half> %bin.rdx)
    ret half %r
  }
  
@@ -45,7 +45,7 @@ define half @add_H(<8 x half> %bin.rdx)  {
  ; CHECKNOFP16-NOT:   fadd h{{[0-9]+}}
  ; CHECKNOFP16-NOT:   fadd v{{[0-9]+}}.{{[0-9]}}h
  ; CHECKNOFP16:       ret
-  %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v8f16(<8 x half> undef, <8 x half> %bin.rdx)
+  %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v8f16(half undef, <8 x half> %bin.rdx)
    ret half %r
  }
  
@@ -55,7 +55,7 @@ define float @add_S(<4 x float> %bin.rdx)  {
  ; CHECK-NEXT:  fadd  v0.2s, v0.2s, v1.2s
  ; CHECK-NEXT:  faddp s0, v0.2s
  ; CHECK-NEXT:  ret
-  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(<4 x float> undef, <4 x float> %bin.rdx)
+  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %bin.rdx)
    ret float %r
  }
  
@@ -63,7 +63,7 @@ define double @add_D(<2 x double> %bin.rdx)  {
  ; CHECK-LABEL: add_D:
  ; CHECK:       faddp d0, v0.2d
  ; CHECK-NEXT:  ret
-  %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(<2 x double> undef, <2 x double> %bin.rdx)
+  %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %bin.rdx)
    ret double %r
  }
  
@@ -84,7 +84,7 @@ define half @add_2H(<16 x half> %bin.rdx)  {
  ; CHECKNOFP16-NOT:   fadd h{{[0-9]+}}
  ; CHECKNOFP16-NOT:   fadd v{{[0-9]+}}.{{[0-9]}}h
  ; CHECKNOFP16:       ret
-  %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v16f16(<16 x half> undef, <16 x half> %bin.rdx)
+  %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v16f16(half undef, <16 x half> %bin.rdx)
    ret half %r
  }
  
@@ -95,7 +95,7 @@ define float @add_2S(<8 x float> %bin.rdx)  {
  ; CHECK-NEXT:  fadd  v0.2s, v0.2s, v1.2s
  ; CHECK-NEXT:  faddp s0, v0.2s
  ; CHECK-NEXT:  ret
-  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(<8 x float> undef, <8 x float> %bin.rdx)
+  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %bin.rdx)
    ret float %r
  }
  
@@ -104,16 +104,16 @@ define double @add_2D(<4 x double> %bin.rdx)  {
  ; CHECK:       fadd v0.2d, v0.2d, v1.2d
  ; CHECK-NEXT:  faddp d0, v0.2d
  ; CHECK-NEXT:  ret
-  %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(<4 x double> undef, <4 x double> %bin.rdx)
+  %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %bin.rdx)
    ret double %r
  }
  
  ; Function Attrs: nounwind readnone
-declare half @llvm.experimental.vector.reduce.fadd.f16.v4f16(<4 x half>, <4 x half>)
-declare half @llvm.experimental.vector.reduce.fadd.f16.v8f16(<8 x half>, <8 x half>)
-declare half @llvm.experimental.vector.reduce.fadd.f16.v16f16(<16 x half>, <16 x half>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(<2 x float>, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(<4 x float>, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(<8 x float>, <8 x float>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(<2 x double>, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(<4 x double>, <4 x double>)
+declare half @llvm.experimental.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
+declare half @llvm.experimental.vector.reduce.fadd.f16.v8f16(half, <8 x half>)
+declare half @llvm.experimental.vector.reduce.fadd.f16.v16f16(half, <16 x half>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll

index 7baa7513fb13ea58539b9d748341b8197e8bb639..f7d70d5d7da8e9eec548e8a2b04ac2a8142ab622 100644 (file)
--- a/test/CodeGen/X86/haddsub.ll
+++ b/test/CodeGen/X86/haddsub.ll
@@ -1628,8 +1628,8 @@ define float @extract_extract01_v4f32_fadd_f32_uses3(<4 x float> %x, float* %p1,
  ; Repeat tests from general reductions to verify output for hoppy targets:
  ; PR38971: https://bugs.llvm.org/show_bug.cgi?id=38971
  
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float, <8 x float>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double, <4 x double>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
  
  define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) {
  ; SSE3-SLOW-LABEL: fadd_reduce_v8f32:
@@ -1671,7 +1671,7 @@ define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) {
  ; AVX-FAST-NEXT:    vhaddps %xmm0, %xmm0, %xmm0
  ; AVX-FAST-NEXT:    vzeroupper
  ; AVX-FAST-NEXT:    retq
-  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
+  %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
    ret float %r
  }
  
@@ -1707,7 +1707,7 @@ define double @fadd_reduce_v4f64(double %a0, <4 x double> %a1) {
  ; AVX-FAST-NEXT:    vhaddpd %xmm0, %xmm0, %xmm0
  ; AVX-FAST-NEXT:    vzeroupper
  ; AVX-FAST-NEXT:    retq
-  %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
+  %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
    ret double %r
  }
  
diff --git a/test/CodeGen/X86/vector-reduce-fadd-fast.ll b/test/CodeGen/X86/vector-reduce-fadd-fast.ll

index 54513af0804b55bf203f4267612e26a47725af8a..3f72450d97788a60f4375ed027ddfe7c87a812fc 100644 (file)
--- a/test/CodeGen/X86/vector-reduce-fadd-fast.ll
+++ b/test/CodeGen/X86/vector-reduce-fadd-fast.ll
@@ -47,7 +47,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
    ret float %1
  }
  
@@ -101,7 +101,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
    ret float %1
  }
  
@@ -169,7 +169,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
    ret float %1
  }
  
@@ -246,7 +246,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
    ret float %1
  }
  
@@ -291,7 +291,7 @@ define float @test_v2f32_zero(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float 0.0, <2 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
    ret float %1
  }
  
@@ -346,7 +346,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float 0.0, <4 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
    ret float %1
  }
  
@@ -415,7 +415,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
    ret float %1
  }
  
@@ -493,7 +493,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
    ret float %1
  }
  
@@ -538,7 +538,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float undef, <2 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %a0)
    ret float %1
  }
  
@@ -593,7 +593,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %a0)
    ret float %1
  }
  
@@ -662,7 +662,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float undef, <8 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %a0)
    ret float %1
  }
  
@@ -740,7 +740,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float undef, <16 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float undef, <16 x float> %a0)
    ret float %1
  }
  
@@ -778,7 +778,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
  ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
    ret double %1
  }
  
@@ -825,7 +825,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
    ret double %1
  }
  
@@ -879,7 +879,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
    ret double %1
  }
  
@@ -944,7 +944,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
    ret double %1
  }
  
@@ -983,7 +983,7 @@ define double @test_v2f64_zero(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double 0.0, <2 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
    ret double %1
  }
  
@@ -1031,7 +1031,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
    ret double %1
  }
  
@@ -1086,7 +1086,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
    ret double %1
  }
  
@@ -1151,7 +1151,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
    ret double %1
  }
  
@@ -1190,7 +1190,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double undef, <2 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %a0)
    ret double %1
  }
  
@@ -1238,7 +1238,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double undef, <4 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %a0)
    ret double %1
  }
  
@@ -1293,7 +1293,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double undef, <8 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double undef, <8 x double> %a0)
    ret double %1
  }
  
@@ -1358,16 +1358,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double undef, <16 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double undef, <16 x double> %a0)
    ret double %1
  }
  
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
  
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double, <16 x double>)
diff --git a/test/CodeGen/X86/vector-reduce-fadd.ll b/test/CodeGen/X86/vector-reduce-fadd.ll

index cae5f4c1252215e9cae4ecaa18ea58e29729069c..480b80614132b51632e7d6a70afd622800ba5cce 100644 (file)
--- a/test/CodeGen/X86/vector-reduce-fadd.ll
+++ b/test/CodeGen/X86/vector-reduce-fadd.ll
@@ -39,7 +39,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
    ret float %1
  }
  
@@ -90,7 +90,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
  ; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
    ret float %1
  }
  
@@ -176,7 +176,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
    ret float %1
  }
  
@@ -327,7 +327,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
  ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
    ret float %1
  }
  
@@ -367,7 +367,7 @@ define float @test_v2f32_zero(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float 0.0, <2 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
    ret float %1
  }
  
@@ -422,7 +422,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
  ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float 0.0, <4 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
    ret float %1
  }
  
@@ -512,7 +512,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
    ret float %1
  }
  
@@ -667,7 +667,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
    ret float %1
  }
  
@@ -699,7 +699,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float undef, <2 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %a0)
    ret float %1
  }
  
@@ -746,7 +746,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
  ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %a0)
    ret float %1
  }
  
@@ -828,7 +828,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float undef, <8 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %a0)
    ret float %1
  }
  
@@ -975,7 +975,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
  ; AVX512-NEXT:    vaddss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float undef, <16 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float undef, <16 x float> %a0)
    ret float %1
  }
  
@@ -1004,7 +1004,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
    ret double %1
  }
  
@@ -1042,7 +1042,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
    ret double %1
  }
  
@@ -1101,7 +1101,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
    ret double %1
  }
  
@@ -1202,7 +1202,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
    ret double %1
  }
  
@@ -1234,7 +1234,7 @@ define double @test_v2f64_zero(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
  ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double 0.0, <2 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
    ret double %1
  }
  
@@ -1275,7 +1275,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
    ret double %1
  }
  
@@ -1337,7 +1337,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
    ret double %1
  }
  
@@ -1440,7 +1440,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
    ret double %1
  }
  
@@ -1466,7 +1466,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
  ; AVX512-NEXT:    vaddsd {{.*}}(%rip), %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double undef, <2 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %a0)
    ret double %1
  }
  
@@ -1501,7 +1501,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double undef, <4 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %a0)
    ret double %1
  }
  
@@ -1557,7 +1557,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double undef, <8 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double undef, <8 x double> %a0)
    ret double %1
  }
  
@@ -1654,16 +1654,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
  ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double undef, <16 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double undef, <16 x double> %a0)
    ret double %1
  }
  
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
  
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double, <16 x double>)
diff --git a/test/CodeGen/X86/vector-reduce-fmul-fast.ll b/test/CodeGen/X86/vector-reduce-fmul-fast.ll

index 3cd94151acaa2ab8a67ffb065bf26bd500f3a396..8011ee36a593aec07d0a3c4ac721255271c7abe7 100644 (file)
--- a/test/CodeGen/X86/vector-reduce-fmul-fast.ll
+++ b/test/CodeGen/X86/vector-reduce-fmul-fast.ll
@@ -35,7 +35,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1)
    ret float %1
  }
  
@@ -74,7 +74,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1)
    ret float %1
  }
  
@@ -121,7 +121,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1)
    ret float %1
  }
  
@@ -175,7 +175,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1)
    ret float %1
  }
  
@@ -209,7 +209,7 @@ define float @test_v2f32_zero(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float 1.0, <2 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
    ret float %1
  }
  
@@ -249,7 +249,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float 1.0, <4 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
    ret float %1
  }
  
@@ -297,7 +297,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
    ret float %1
  }
  
@@ -352,7 +352,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
    ret float %1
  }
  
@@ -386,7 +386,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float undef, <2 x float> %a0)
    ret float %1
  }
  
@@ -426,7 +426,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %a0)
    ret float %1
  }
  
@@ -474,7 +474,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float undef, <8 x float> %a0)
    ret float %1
  }
  
@@ -529,7 +529,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0)
+  %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float undef, <16 x float> %a0)
    ret float %1
  }
  
@@ -556,7 +556,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
  ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1)
    ret double %1
  }
  
@@ -586,7 +586,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1)
    ret double %1
  }
  
@@ -621,7 +621,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1)
    ret double %1
  }
  
@@ -663,7 +663,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1)
    ret double %1
  }
  
@@ -691,7 +691,7 @@ define double @test_v2f64_zero(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double 1.0, <2 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
    ret double %1
  }
  
@@ -722,7 +722,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
    ret double %1
  }
  
@@ -758,7 +758,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
    ret double %1
  }
  
@@ -800,7 +800,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
    ret double %1
  }
  
@@ -828,7 +828,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double undef, <2 x double> %a0)
    ret double %1
  }
  
@@ -859,7 +859,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double undef, <4 x double> %a0)
    ret double %1
  }
  
@@ -895,7 +895,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double undef, <8 x double> %a0)
    ret double %1
  }
  
@@ -937,16 +937,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0)
+  %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double undef, <16 x double> %a0)
    ret double %1
  }
  
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
  
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double, <16 x double>)
diff --git a/test/CodeGen/X86/vector-reduce-fmul.ll b/test/CodeGen/X86/vector-reduce-fmul.ll

index a4b255ce525e047f2decee11087521d9d88ac067..8c885b4aad5ed97191e4f9489ef5b3a7f5299976 100644 (file)
--- a/test/CodeGen/X86/vector-reduce-fmul.ll
+++ b/test/CodeGen/X86/vector-reduce-fmul.ll
@@ -38,7 +38,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float %a0, <2 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1)
    ret float %1
  }
  
@@ -89,7 +89,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
  ; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float %a0, <4 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1)
    ret float %1
  }
  
@@ -175,7 +175,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1)
    ret float %1
  }
  
@@ -326,7 +326,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1)
    ret float %1
  }
  
@@ -360,7 +360,7 @@ define float @test_v2f32_one(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vmulss %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float 1.0, <2 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
    ret float %1
  }
  
@@ -407,7 +407,7 @@ define float @test_v4f32_one(<4 x float> %a0) {
  ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float 1.0, <4 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
    ret float %1
  }
  
@@ -489,7 +489,7 @@ define float @test_v8f32_one(<8 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
    ret float %1
  }
  
@@ -636,7 +636,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
    ret float %1
  }
  
@@ -668,7 +668,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
  ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
  ; AVX512-NEXT:    vmulss {{.*}}(%rip), %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float undef, <2 x float> %a0)
    ret float %1
  }
  
@@ -715,7 +715,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
  ; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %a0)
    ret float %1
  }
  
@@ -797,7 +797,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float undef, <8 x float> %a0)
    ret float %1
  }
  
@@ -944,7 +944,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
  ; AVX512-NEXT:    vmulss %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0)
+  %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float undef, <16 x float> %a0)
    ret float %1
  }
  
@@ -973,7 +973,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double %a0, <2 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1)
    ret double %1
  }
  
@@ -1011,7 +1011,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1)
    ret double %1
  }
  
@@ -1070,7 +1070,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1)
    ret double %1
  }
  
@@ -1171,7 +1171,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1)
    ret double %1
  }
  
@@ -1199,7 +1199,7 @@ define double @test_v2f64_one(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double 1.0, <2 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
    ret double %1
  }
  
@@ -1236,7 +1236,7 @@ define double @test_v4f64_one(<4 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
    ret double %1
  }
  
@@ -1294,7 +1294,7 @@ define double @test_v8f64_one(<8 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
    ret double %1
  }
  
@@ -1392,7 +1392,7 @@ define double @test_v16f64_one(<16 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
    ret double %1
  }
  
@@ -1418,7 +1418,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
  ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
  ; AVX512-NEXT:    vmulsd {{.*}}(%rip), %xmm0, %xmm0
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double undef, <2 x double> %a0)
    ret double %1
  }
  
@@ -1453,7 +1453,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double undef, <4 x double> %a0)
    ret double %1
  }
  
@@ -1509,7 +1509,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double undef, <8 x double> %a0)
    ret double %1
  }
  
@@ -1606,16 +1606,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
  ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
  ; AVX512-NEXT:    vzeroupper
  ; AVX512-NEXT:    retq
-  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0)
+  %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double undef, <16 x double> %a0)
    ret double %1
  }
  
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
  
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double, <16 x double>)
author	Sander de Smalen <sander.desmalen@arm.com>
	Mon, 20 May 2019 09:54:06 +0000 (09:54 +0000)
committer	Sander de Smalen <sander.desmalen@arm.com>
	Mon, 20 May 2019 09:54:06 +0000 (09:54 +0000)
docs/LangRef.rst		patch \| blob \| history
include/llvm/IR/Intrinsics.td		patch \| blob \| history
lib/IR/IRBuilder.cpp		patch \| blob \| history
test/Assembler/invalid-vecreduce.ll	[new file with mode: 0644]	patch \| blob
test/CodeGen/AArch64/vecreduce-fadd.ll		patch \| blob \| history
test/CodeGen/X86/haddsub.ll		patch \| blob \| history
test/CodeGen/X86/vector-reduce-fadd-fast.ll		patch \| blob \| history
test/CodeGen/X86/vector-reduce-fadd.ll		patch \| blob \| history
test/CodeGen/X86/vector-reduce-fmul-fast.ll		patch \| blob \| history
test/CodeGen/X86/vector-reduce-fmul.ll		patch \| blob \| history