From ad059a3d8bdbd45b6e994f854dc79624168442dd Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 13 Aug 2014 00:29:01 +0000 Subject: [PATCH] [AVX512] Add intrinsics for FP scalar broadcasts Similar approach to the set1 intrinsics is used: implement in terms of vector initializers and then ensure with an LLVM test that a broadcast is generated at the end. Part of git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@215486 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/avx512fintrin.h | 18 ++++++++++++++++++ test/CodeGen/avx512f-builtins.c | 14 ++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index ad92fe7b64..1b558a0404 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -117,6 +117,24 @@ _mm512_set1_epi64(long long __d) return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } +static __inline__ __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_broadcastss_ps(__m128 __X) +{ + float __f = __X[0]; + return (__v16sf){ __f, __f, __f, __f, + __f, __f, __f, __f, + __f, __f, __f, __f, + __f, __f, __f, __f }; +} + +static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_broadcastsd_pd(__m128d __X) +{ + double __d = __X[0]; + return (__v8df){ __d, __d, __d, __d, + __d, __d, __d, __d }; +} + /* Cast between vector types */ static __inline __m512d __attribute__((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 35d79a8bff..b5b4ff2391 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -137,3 +137,17 @@ __m512i test_mm512_valign_epi64(__m512i a, __m512i b) // CHECK: @llvm.x86.avx512.mask.valign.q.512 return _mm512_valign_epi64(a, b, 2); } + +__m512d test_mm512_broadcastsd_pd(__m128d a) +{ + // CHECK-LABEL: @test_mm512_broadcastsd_pd + // CHECK: insertelement <8 x double> {{.*}}, i32 0 + // CHECK: insertelement <8 x double> {{.*}}, i32 1 + // CHECK: insertelement <8 x double> {{.*}}, i32 2 + // CHECK: insertelement <8 x double> {{.*}}, i32 3 + // CHECK: insertelement <8 x double> {{.*}}, i32 4 + // CHECK: insertelement <8 x double> {{.*}}, i32 5 + // CHECK: insertelement <8 x double> {{.*}}, i32 6 + // CHECK: insertelement <8 x double> {{.*}}, i32 7 + return _mm512_broadcastsd_pd(a); +} -- 2.40.0