]> granicus.if.org Git - clang/commitdiff
[AVX512] Add intrinsic for valignd/q
authorAdam Nemet <anemet@apple.com>
Tue, 5 Aug 2014 17:28:23 +0000 (17:28 +0000)
committerAdam Nemet <anemet@apple.com>
Tue, 5 Aug 2014 17:28:23 +0000 (17:28 +0000)
Note that similar to palingr, we could further optimize these to emit
shufflevector when the shift count is <=64.  This however does not
change the overall design that unlike palignr we would still need the LLVM
intrinsic corresponding to this intruction to handle the >64 cases.  (palignr
uses the psrldq intrinsic in this case.)

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@214891 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/Headers/avx512fintrin.h
test/CodeGen/avx512f-builtins.c

index d546989738bc62319b37534db9c9e45aaf5ded5e..117232cfc4a72c853a50b866a807329d18ef8a62 100644 (file)
@@ -838,6 +838,8 @@ BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "")
 BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
 BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "")
 BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "")
+BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiUcV8LLiUc", "")
+BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iUcV16iUc", "")
 BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dv*V8iUcCi", "")
 BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fv*UsCi", "")
 BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dv*V8LLiUcCi", "")
index c205662c164e26a3025804dc154983efa100b18d..ad92fe7b64b99529b1e817022afb1db47bdfb33d 100644 (file)
@@ -529,6 +529,26 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
                                                        (__mmask16) -1);
 }
 
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_valign_epi64(__m512i __A, __m512i __B, const int __I)
+{
+  return (__m512i) __builtin_ia32_alignq512_mask((__v8di)__A,
+                                                 (__v8di)__B,
+                                                 __I,
+                                                 (__v8di)_mm512_setzero_si512(),
+                                                 (__mmask8) -1);
+}
+
+static __inline __m512i __attribute__ ((__always_inline__, __nodebug__))
+_mm512_valign_epi32(__m512i __A, __m512i __B, const int __I)
+{
+  return (__m512i)__builtin_ia32_alignd512_mask((__v16si)__A,
+                                                (__v16si)__B,
+                                                __I,
+                                                (__v16si)_mm512_setzero_si512(),
+                                                (__mmask16) -1);
+}
+
 /* Vector Blend */
 
 static __inline __m512d __attribute__ ((__always_inline__, __nodebug__))
index 4c4a064ff5d056f9719e2e35b3c08f1de3d2e4fb..35d79a8bff126564be79de47aa378c0bcedff9d5 100644 (file)
@@ -130,3 +130,10 @@ __mmask16 test_mm512_knot(__mmask16 a)
   // CHECK: @llvm.x86.avx512.knot.w
   return _mm512_knot(a);
 }
+
+__m512i test_mm512_valign_epi64(__m512i a, __m512i b)
+{
+  // CHECK-LABEL: @test_mm512_valign_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.512
+  return _mm512_valign_epi64(a, b, 2);
+}