From ad059a3d8bdbd45b6e994f854dc79624168442dd Mon Sep 17 00:00:00 2001
From: Adam Nemet <anemet@apple.com>
Date: Wed, 13 Aug 2014 00:29:01 +0000
Subject: [PATCH] [AVX512] Add intrinsics for FP scalar broadcasts

Similar approach to the set1 intrinsics is used: implement in terms of vector
initializers and then ensure with an LLVM test that a broadcast is generated
at the end.

Part of <rdar://problem/17688758>

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@215486 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Headers/avx512fintrin.h     | 18 ++++++++++++++++++
 test/CodeGen/avx512f-builtins.c | 14 ++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index ad92fe7b64..1b558a0404 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -117,6 +117,24 @@ _mm512_set1_epi64(long long __d)
   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
+static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_broadcastss_ps(__m128 __X)
+{
+  float __f = __X[0];
+  return (__v16sf){ __f, __f, __f, __f,
+                    __f, __f, __f, __f,
+                    __f, __f, __f, __f,
+                    __f, __f, __f, __f };
+}
+
+static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_broadcastsd_pd(__m128d __X)
+{
+  double __d = __X[0];
+  return (__v8df){ __d, __d, __d, __d,
+                   __d, __d, __d, __d };
+}
+
 /* Cast between vector types */
 
 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index 35d79a8bff..b5b4ff2391 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -137,3 +137,17 @@ __m512i test_mm512_valign_epi64(__m512i a, __m512i b)
   // CHECK: @llvm.x86.avx512.mask.valign.q.512
   return _mm512_valign_epi64(a, b, 2);
 }
+
+__m512d test_mm512_broadcastsd_pd(__m128d a)
+{
+  // CHECK-LABEL: @test_mm512_broadcastsd_pd
+  // CHECK: insertelement <8 x double> {{.*}}, i32 0
+  // CHECK: insertelement <8 x double> {{.*}}, i32 1
+  // CHECK: insertelement <8 x double> {{.*}}, i32 2
+  // CHECK: insertelement <8 x double> {{.*}}, i32 3
+  // CHECK: insertelement <8 x double> {{.*}}, i32 4
+  // CHECK: insertelement <8 x double> {{.*}}, i32 5
+  // CHECK: insertelement <8 x double> {{.*}}, i32 6
+  // CHECK: insertelement <8 x double> {{.*}}, i32 7
+  return _mm512_broadcastsd_pd(a);
+}
-- 
2.40.0