]> granicus.if.org Git - clang/commitdiff
[AVX512] Add intrinsics for FP scalar broadcasts
authorAdam Nemet <anemet@apple.com>
Wed, 13 Aug 2014 00:29:01 +0000 (00:29 +0000)
committerAdam Nemet <anemet@apple.com>
Wed, 13 Aug 2014 00:29:01 +0000 (00:29 +0000)
Similar approach to the set1 intrinsics is used: implement in terms of vector
initializers and then ensure with an LLVM test that a broadcast is generated
at the end.

Part of <rdar://problem/17688758>

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@215486 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Headers/avx512fintrin.h
test/CodeGen/avx512f-builtins.c

index ad92fe7b64b99529b1e817022afb1db47bdfb33d..1b558a0404aed444bee77e9cc036721e5bc985a5 100644 (file)
@@ -117,6 +117,24 @@ _mm512_set1_epi64(long long __d)
   return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
+static __inline__ __m512 __attribute__((__always_inline__, __nodebug__))
+_mm512_broadcastss_ps(__m128 __X)
+{
+  float __f = __X[0];
+  return (__v16sf){ __f, __f, __f, __f,
+                    __f, __f, __f, __f,
+                    __f, __f, __f, __f,
+                    __f, __f, __f, __f };
+}
+
+static __inline__ __m512d __attribute__((__always_inline__, __nodebug__))
+_mm512_broadcastsd_pd(__m128d __X)
+{
+  double __d = __X[0];
+  return (__v8df){ __d, __d, __d, __d,
+                   __d, __d, __d, __d };
+}
+
 /* Cast between vector types */
 
 static __inline __m512d __attribute__((__always_inline__, __nodebug__))
index 35d79a8bff126564be79de47aa378c0bcedff9d5..b5b4ff23916452fba6ab4f8da408ddb88528e82d 100644 (file)
@@ -137,3 +137,17 @@ __m512i test_mm512_valign_epi64(__m512i a, __m512i b)
   // CHECK: @llvm.x86.avx512.mask.valign.q.512
   return _mm512_valign_epi64(a, b, 2);
 }
+
+__m512d test_mm512_broadcastsd_pd(__m128d a)
+{
+  // CHECK-LABEL: @test_mm512_broadcastsd_pd
+  // CHECK: insertelement <8 x double> {{.*}}, i32 0
+  // CHECK: insertelement <8 x double> {{.*}}, i32 1
+  // CHECK: insertelement <8 x double> {{.*}}, i32 2
+  // CHECK: insertelement <8 x double> {{.*}}, i32 3
+  // CHECK: insertelement <8 x double> {{.*}}, i32 4
+  // CHECK: insertelement <8 x double> {{.*}}, i32 5
+  // CHECK: insertelement <8 x double> {{.*}}, i32 6
+  // CHECK: insertelement <8 x double> {{.*}}, i32 7
+  return _mm512_broadcastsd_pd(a);
+}