]> granicus.if.org Git - clang/commitdiff
[Clang][BuiltIn][AVX512] Adding intrinsics fot align{d|q} and palignr instruction set
authorMichael Zuckerman <Michael.zuckerman@intel.com>
Thu, 28 Apr 2016 12:47:30 +0000 (12:47 +0000)
committerMichael Zuckerman <Michael.zuckerman@intel.com>
Thu, 28 Apr 2016 12:47:30 +0000 (12:47 +0000)
Differential Revision: http://reviews.llvm.org/D19588

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@267876 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/BuiltinsX86.def
lib/Headers/avx512bwintrin.h
lib/Headers/avx512fintrin.h
lib/Headers/avx512vlbwintrin.h
lib/Headers/avx512vlintrin.h
test/CodeGen/avx512bw-builtins.c
test/CodeGen/avx512f-builtins.c
test/CodeGen/avx512vl-builtins.c
test/CodeGen/avx512vlbw-builtins.c

index 8020f06edc3b614a29ffe23fccbe741024807229..4a58c91dcd1f8a62828f6cb2f8272a6116599b2f 100644 (file)
@@ -1058,6 +1058,10 @@ TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "a
 TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignd128_mask, "V4iV4iV4iIiV4iUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl")
 TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "", "avx512f")
 
@@ -2207,6 +2211,10 @@ TARGET_BUILTIN(__builtin_ia32_movntdq512, "vV8LLi*V8LLi","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_movntdqa512, "V8LLiV8LLi*","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_movntpd512, "vd*V8d","","avx512f")
 TARGET_BUILTIN(__builtin_ia32_movntps512, "vf*V16f","","avx512f")
+TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64ciV64cULLi","","avx512bw")
+TARGET_BUILTIN(__builtin_ia32_palignr128_mask, "V16cV16cV16ciV16cUs","","avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_palignr256_mask, "V32cV32cV32ciV32cUi","","avx512bw,avx512vl")
+
 
 #undef BUILTIN
 #undef TARGET_BUILTIN
index e0307cdcf8678dd2ff706c2ced47389e2a39e679..4f451df3f869420f4fdf130b8b9d645733c7f5ad 100644 (file)
@@ -2168,6 +2168,29 @@ _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
                  (__mmask32) __M);
 }
 
+#define _mm512_alignr_epi8( __A, __B, __N) __extension__ ({\
+__builtin_ia32_palignr512_mask ((__v8di) __A,\
+                 (__v8di) __B ,__N * 8,\
+                 (__v8di) _mm512_undefined_pd (),\
+                 (__mmask64) -1);\
+})
+
+#define _mm512_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__({\
+__builtin_ia32_palignr512_mask ((__v8di) __A,\
+                 (__v8di) __B,\
+                 __N * 8,\
+                 (__v8di) __W,\
+                 (__mmask64) __U);\
+})
+
+#define _mm512_maskz_alignr_epi8( __U, __A, __B, __N) __extension__({\
+__builtin_ia32_palignr512_mask ((__v8di) __A,\
+                 (__v8di) __B,\
+                 __N * 8,\
+                 (__v8di) _mm512_setzero_si512 (),\
+                 (__mmask64) __U);\
+})
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif
index 38d2ccb52af3e27d80a329ea2d9d8d7c143715f8..844025836a4884b125d929bc9e81d1978d533157 100644 (file)
@@ -2550,12 +2550,40 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
                                          (I), (__v8di)_mm512_setzero_si512(), \
                                          (__mmask8)-1); })
 
+#define _mm512_mask_alignr_epi64( __W,  __U,  __A, __B, __imm) __extension__({\
+  (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\
+                                         (__v8di) __B, __imm,\
+                                         (__v8di) __W,\
+                                         (__mmask8) __U);\
+})
+
+#define _mm512_maskz_alignr_epi64( __U,  __A,  __B, __imm) __extension__({\
+  (__m512i)__builtin_ia32_alignq512_mask ((__v8di) __A,\
+                                         (__v8di) __B, __imm,\
+                                         (__v8di) _mm512_setzero_si512 (),\
+                                         (__mmask8) __U);\
+})
+
 #define _mm512_alignr_epi32(A, B, I) __extension__ ({ \
-  (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
+    (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \
                                          (__v16si)(__m512i)(B), \
                                          (I), (__v16si)_mm512_setzero_si512(), \
-                                         (__mmask16)-1); })
+                                         (__mmask16)-1);\
+})
+                                         
+#define _mm512_mask_alignr_epi32( __W, __U, __A, __B,  __imm) __extension__ ({\
+    (__m512i) __builtin_ia32_alignd512_mask((__v16si) __A,\
+                                         (__v16si) __B, __imm,\
+                                         (__v16si) __W,\
+                                         (__mmask16) __U);\
+})
 
+#define _mm512_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__({\
+    (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A,\
+                                         (__v16si) __B, __imm,\
+                                         (__v16si) _mm512_setzero_si512 (),\
+                                         (__mmask16) __U);\
+})
 /* Vector Extract */
 
 #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
index 361df9391163f5cebb73c2b68b8407b989ade5aa..bee20aa183e729e94df06fb5bded64f8b43b59f3 100644 (file)
@@ -3358,6 +3358,40 @@ _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
                  (__mmask16) __M);
 }
 
+#define _mm_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__ ({ \
+__builtin_ia32_palignr128_mask ((__v2di)( __A),\
+               (__v2di)( __B),\
+               ( __N) * 8,\
+               (__v2di)( __W),\
+               (__mmask16)( __U));\
+})
+
+#define _mm_maskz_alignr_epi8( __U, __A, __B, __N) __extension__ ({ \
+__builtin_ia32_palignr128_mask ((__v2di)( __A),\
+               (__v2di)( __B),\
+               ( __N) * 8,\
+               (__v2di)\
+               _mm_setzero_si128 (),\
+               (__mmask16)( __U));\
+})
+
+#define _mm256_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__ ({ \
+__builtin_ia32_palignr256_mask ((__v4di)( __A),\
+               (__v4di)( __B),\
+               ( __N) * 8,\
+               (__v4di)( __W),\
+               (__mmask32)( __U));\
+})
+
+#define _mm256_maskz_alignr_epi8( __U, __A, __B, __N) __extension__ ({ \
+__builtin_ia32_palignr256_mask ((__v4di)( __A),\
+               (__v4di)( __B),\
+               ( __N) * 8,\
+               (__v4di)\
+               _mm256_setzero_si256 (),\
+               (__mmask32)( __U));\
+})
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif /* __AVX512VLBWINTRIN_H */
index 77d98b887ff573ebbe88e80b5ff282684340f231..60c2fbec8ec3c7c81c118702ccf6ca630544ff48 100644 (file)
@@ -9209,6 +9209,90 @@ _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y)
                  (__mmask8) -1);
 }
 
+#define _mm_alignr_epi32( __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignd128_mask ((__v4si)( __A),\
+              (__v4si)( __B),( __imm),\
+              (__v4si) _mm_undefined_si128 (),\
+              (__mmask8) -1);\
+})
+
+#define _mm_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignd128_mask ((__v4si)( __A),\
+              (__v4si)( __B),( __imm),\
+              (__v4si)( __W),\
+              (__mmask8)( __U));\
+})
+
+#define _mm_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignd128_mask ((__v4si)( __A),\
+              (__v4si)( __B),( __imm),\
+              (__v4si) _mm_setzero_si128 (),\
+              (__mmask8)( __U));\
+})
+
+#define _mm256_alignr_epi32( __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignd256_mask ((__v8si)( __A),\
+              (__v8si)( __B),( __imm),\
+              (__v8si) _mm256_undefined_si256 (),\
+              (__mmask8) -1);\
+})
+
+#define _mm256_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignd256_mask ((__v8si)( __A),\
+              (__v8si)( __B),( __imm),\
+              (__v8si)( __W),\
+              (__mmask8)( __U));\
+})
+
+#define _mm256_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignd256_mask ((__v8si)( __A),\
+              (__v8si)( __B),( __imm),\
+              (__v8si) _mm256_setzero_si256 (),\
+              (__mmask8)( __U));\
+})
+
+#define _mm_alignr_epi64( __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignq128_mask ((__v2di)( __A),\
+              (__v2di)( __B),( __imm),\
+              (__v2di) _mm_setzero_di (),\
+              (__mmask8) -1);\
+})
+
+#define _mm_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignq128_mask ((__v2di)( __A),\
+              (__v2di)( __B),( __imm),\
+              (__v2di)( __W),\
+              (__mmask8)( __U));\
+})
+
+#define _mm_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignq128_mask ((__v2di)( __A),\
+              (__v2di)( __B),( __imm),\
+              (__v2di) _mm_setzero_di (),\
+              (__mmask8)( __U));\
+})
+
+#define _mm256_alignr_epi64( __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignq256_mask ((__v4di)( __A),\
+              (__v4di)( __B),( __imm),\
+              (__v4di) _mm256_undefined_pd (),\
+              (__mmask8) -1);\
+})
+
+#define _mm256_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignq256_mask ((__v4di)( __A),\
+              (__v4di)( __B),( __imm),\
+              (__v4di)( __W),\
+              (__mmask8)( __U));\
+})
+
+#define _mm256_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__ ({ \
+__builtin_ia32_alignq256_mask ((__v4di)( __A),\
+              (__v4di)( __B),( __imm),\
+              (__v4di) _mm256_setzero_si256 (),\
+              (__mmask8)( __U));\
+})
+
 #undef __DEFAULT_FN_ATTRS
 #undef __DEFAULT_FN_ATTRS_BOTH
 
index 3023e6060044919f78c8c3ef1612686cab1e6e54..8925cbadc064f69e201cf7127a19a46a0aa30d76 100644 (file)
@@ -1487,3 +1487,23 @@ __m512i test_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __
   // CHECK: @llvm.x86.avx512.mask.permvar.hi.512
   return _mm512_mask_permutexvar_epi16(__W, __M, __A, __B); 
 }
+__m512i test_mm512_alignr_epi8(__m512i __A,__m512i __B){
+    // CHECK-LABEL: @test_mm512_alignr_epi8
+    // CHECK: @llvm.x86.avx512.mask.palignr.512
+    return _mm512_alignr_epi8(__A, __B, 2); 
+}
+
+__m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A,__m512i __B){
+    // CHECK-LABEL: @test_mm512_mask_alignr_epi8
+    // CHECK: @llvm.x86.avx512.mask.palignr.512
+    return _mm512_mask_alignr_epi8(__W, __U, __A, __B, 2); 
+}
+
+__m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A,__m512i __B){
+    // CHECK-LABEL: @test_mm512_maskz_alignr_epi8
+    // CHECK: @llvm.x86.avx512.mask.palignr.512
+   return _mm512_maskz_alignr_epi8(__U, __A, __B, 2); 
+}
+
+
+
index da3bf07668cca4a5d519b94c1ab02a4213718f95..1f048d36b158be8459e6b4e9ed5f46ae83b2957d 100644 (file)
@@ -180,6 +180,20 @@ __m512i test_mm512_alignr_epi32(__m512i a, __m512i b)
   return _mm512_alignr_epi32(a, b, 2);
 }
 
+__m512i test_mm512_mask_alignr_epi32(__m512i w, __mmask16 u, __m512i a, __m512i b)
+{
+  // CHECK-LABEL: @test_mm512_mask_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.512
+  return _mm512_mask_alignr_epi32(w, u, a, b, 2);
+}
+
+__m512i test_mm512_maskz_alignr_epi32( __mmask16 u, __m512i a, __m512i b)
+{
+  // CHECK-LABEL: @test_mm512_maskz_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.512
+  return _mm512_maskz_alignr_epi32(u, a, b, 2);
+}
+
 __m512i test_mm512_alignr_epi64(__m512i a, __m512i b)
 {
   // CHECK-LABEL: @test_mm512_alignr_epi64
@@ -187,6 +201,20 @@ __m512i test_mm512_alignr_epi64(__m512i a, __m512i b)
   return _mm512_alignr_epi64(a, b, 2);
 }
 
+__m512i test_mm512_mask_alignr_epi64(__m512i w, __mmask8 u, __m512i a, __m512i b)
+{
+  // CHECK-LABEL: @test_mm512_mask_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.512
+  return _mm512_mask_alignr_epi64(w, u, a, b, 2);
+}
+
+__m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b)
+{
+  // CHECK-LABEL: @test_mm512_maskz_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.512
+  return _mm512_maskz_alignr_epi64(u, a, b, 2);
+}
+
 __m512d test_mm512_broadcastsd_pd(__m128d a)
 {
   // CHECK-LABEL: @test_mm512_broadcastsd_pd
index 9ba949e72963d6e4d11648e2c387afe77887bb69..aea65bcb3ededcd1817f6214f8384be020db68c2 100644 (file)
@@ -6461,3 +6461,75 @@ __m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X
   // CHECK: @llvm.x86.avx512.mask.permvar.si.256
   return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y); 
 }
+
+__m128i test_mm_alignr_epi32(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.128
+  return _mm_alignr_epi32(__A, __B, 1); 
+}
+
+__m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.128
+  return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); 
+}
+
+__m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.128
+  return _mm_maskz_alignr_epi32(__U, __A, __B, 1); 
+}
+
+__m256i test_mm256_alignr_epi32(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.256
+  return _mm256_alignr_epi32(__A, __B, 1); 
+}
+
+__m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.256
+  return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); 
+}
+
+__m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_alignr_epi32
+  // CHECK: @llvm.x86.avx512.mask.valign.d.256
+  return _mm256_maskz_alignr_epi32(__U, __A, __B, 1); 
+}
+
+__m128i test_mm_alignr_epi64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.128
+  return _mm_alignr_epi64(__A, __B, 1); 
+}
+
+__m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.128
+  return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); 
+}
+
+__m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.128
+  return _mm_maskz_alignr_epi64(__U, __A, __B, 1); 
+}
+
+__m256i test_mm256_alignr_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.256
+  return _mm256_alignr_epi64(__A, __B, 1); 
+}
+
+__m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.256
+  return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); 
+}
+
+__m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_alignr_epi64
+  // CHECK: @llvm.x86.avx512.mask.valign.q.256
+  return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); 
+}
index f05b32d2fe6f0740888b1cace6fe65772fa1b0dc..f72363d8e9ca514335aa3e9a875fefd6116838a2 100644 (file)
@@ -2316,3 +2316,27 @@ __m256i test_mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __
   // CHECK: @llvm.x86.avx512.mask.permvar.hi.256
   return _mm256_mask_permutexvar_epi16(__W, __M, __A, __B); 
 }
+__m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_alignr_epi8
+  // CHECK: @llvm.x86.avx512.mask.palignr.128
+  return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2); 
+}
+
+__m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_alignr_epi8
+  // CHECK: @llvm.x86.avx512.mask.palignr.128
+  return _mm_maskz_alignr_epi8(__U, __A, __B, 2); 
+}
+
+__m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_alignr_epi8
+  // CHECK: @llvm.x86.avx512.mask.palignr.256
+  return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2); 
+}
+
+__m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_alignr_epi8
+  // CHECK: @llvm.x86.avx512.mask.palignr.256
+  return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); 
+}
+