From 8467662e1a795229f7c903a91f0eab27a303feb0 Mon Sep 17 00:00:00 2001 From: Adam Nemet Date: Tue, 5 Aug 2014 17:28:23 +0000 Subject: [PATCH] [AVX512] Add intrinsic for valignd/q Note that similar to palingr, we could further optimize these to emit shufflevector when the shift count is <=64. This however does not change the overall design that unlike palignr we would still need the LLVM intrinsic corresponding to this intruction to handle the >64 cases. (palignr uses the psrldq intrinsic in this case.) git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@214891 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 2 ++ lib/Headers/avx512fintrin.h | 20 ++++++++++++++++++++ test/CodeGen/avx512f-builtins.c | 7 +++++++ 3 files changed, 29 insertions(+) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index d546989738..117232cfc4 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -838,6 +838,8 @@ BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "") BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "") BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "") BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "") +BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiUcV8LLiUc", "") +BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iUcV16iUc", "") BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dv*V8iUcCi", "") BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fv*UsCi", "") BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dv*V8LLiUcCi", "") diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index c205662c16..ad92fe7b64 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -529,6 +529,26 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) (__mmask16) -1); } +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I) +{ + return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A, + (__v8di)__B, + __I, + (__v8di)_mm512_setzero_si512(), + (__mmask8) -1); +} + +static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) +_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I) +{ + return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A, + (__v16si)__B, + __I, + (__v16si)_mm512_setzero_si512(), + (__mmask16) -1); +} + /* Vector Blend */ static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 4c4a064ff5..35d79a8bff 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -130,3 +130,10 @@ __mmask16 test_mm512_knot(__mmask16 a) // CHECK: @llvm.x86.avx512.knot.w return _mm512_knot(a); } + +__m512i test_mm512_valign_epi64(__m512i a, __m512i b) +{ + // CHECK-LABEL: @test_mm512_valign_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.512 + return _mm512_valign_epi64(a, b, 2); +} -- 2.40.0