From 85631f9bbcd4b92ffc760b3d768b5826e8093a65 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Wed, 14 Jan 2015 01:31:17 +0000 Subject: [PATCH] [AVX512] Add FP unpack intrinsics These are implemented with __builtin_shufflevector just like AVX. We have some tests on the LLVM side to assert that these shufflevectors do indeed generate the corresponding unpck instruction. Part of git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@225922 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/avx512fintrin.h | 33 +++++++++++++++++++++++++++++++++ test/CodeGen/avx512f-builtins.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 9591dcf37a..9c80710110 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -839,6 +839,39 @@ _mm512_cvt_roundpd_epu32(__m512d __A, const int __R) __R); } +/* Unpack and Interleave */ +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); +} + +static __inline __m512d __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_pd(__m512d __a, __m512d __b) +{ + return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpackhi_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 2, 18, 3, 19, + 2+4, 18+4, 3+4, 19+4, + 2+8, 18+8, 3+8, 19+8, + 2+12, 18+12, 3+12, 19+12); +} + +static __inline __m512 __attribute__((__always_inline__, __nodebug__)) +_mm512_unpacklo_ps(__m512 __a, __m512 __b) +{ + return __builtin_shufflevector(__a, __b, + 0, 16, 1, 17, + 0+4, 16+4, 1+4, 17+4, + 0+8, 16+8, 1+8, 17+8, + 0+12, 16+12, 1+12, 17+12); +} + /* Bit Test */ static __inline __mmask16 __attribute__ ((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index ba05f4abdc..8bb013fef7 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -182,3 +182,31 @@ __mmask8 test_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.512 return (__mmask8)_mm512_cmpeq_epi64_mask(__a, __b); } + +__m512d test_mm512_unpackhi_pd(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpackhi_pd + // CHECK: shufflevector <8 x double> {{.*}} + return _mm512_unpackhi_pd(a, b); +} + +__m512d test_mm512_unpacklo_pd(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpacklo_pd + // CHECK: shufflevector <8 x double> {{.*}} + return _mm512_unpacklo_pd(a, b); +} + +__m512d test_mm512_unpackhi_ps(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpackhi_ps + // CHECK: shufflevector <16 x float> {{.*}} + return _mm512_unpackhi_ps(a, b); +} + +__m512d test_mm512_unpacklo_ps(__m512d a, __m512d b) +{ + // CHECK-LABEL: @test_mm512_unpacklo_ps + // CHECK: shufflevector <16 x float> {{.*}} + return _mm512_unpacklo_ps(a, b); +} -- 2.40.0