From ecdea738b87a789b993eab7d2f91bda5e11924a4 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 5 Jul 2016 12:59:33 +0000 Subject: [PATCH] [X86][AVX512] Converted the VBROADCAST intrinsics to generic IR git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274544 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/avx512bwintrin.h | 40 +++++++------- lib/Headers/avx512fintrin.h | 82 ++++++++++++++--------------- lib/Headers/avx512vlbwintrin.h | 48 ++++++++--------- lib/Headers/avx512vlintrin.h | 84 +++++++++++++++--------------- test/CodeGen/avx512bw-builtins.c | 29 ++++++----- test/CodeGen/avx512f-builtins.c | 74 ++++++++++++++------------ test/CodeGen/avx512vl-builtins.c | 70 +++++++++++++++---------- test/CodeGen/avx512vlbw-builtins.c | 42 ++++++++------- 8 files changed, 250 insertions(+), 219 deletions(-) diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index e3caa404fd..d3c5a6c964 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -2266,25 +2266,28 @@ _mm512_movm_epi16 (__mmask32 __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastb_epi8 (__m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, - (__v64qi) _mm512_setzero_si512(), - (__mmask64) -1); + return (__m512i)__builtin_shufflevector((__v16qi) __A, + (__v16qi)_mm_undefined_si128(), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, - (__v64qi) __O, - __M); + return (__m512i)__builtin_ia32_selectb_512(__M, + (__v64qi) _mm512_broadcastb_epi8(__A), + (__v64qi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, - (__v64qi) _mm512_setzero_qi(), - __M); + return (__m512i)__builtin_ia32_selectb_512(__M, + (__v64qi) _mm512_broadcastb_epi8(__A), + (__v64qi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -2306,25 +2309,26 @@ _mm512_maskz_set1_epi16 (__mmask32 __M, short __A) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastw_epi16 (__m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, - (__v32hi) _mm512_setzero_si512(), - (__mmask32) -1); + return (__m512i)__builtin_shufflevector((__v8hi) __A, + (__v8hi)_mm_undefined_si128(), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, - (__v32hi) __O, - __M); + return (__m512i)__builtin_ia32_selectw_512(__M, + (__v32hi) _mm512_broadcastw_epi16(__A), + (__v32hi) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, - (__v32hi) _mm512_setzero_hi(), - __M); + return (__m512i)__builtin_ia32_selectw_512(__M, + (__v32hi) _mm512_broadcastw_epi16(__A), + (__v32hi) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 4f95df8192..c85c793bf4 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -195,54 +195,54 @@ _mm512_undefined_epi32(void) { return (__m512i)__builtin_ia32_undef512(); } + static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastd_epi32 (__m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, - (__v16si) - _mm512_undefined_epi32 (), - (__mmask16) -1); + return (__m512i)__builtin_shufflevector((__v4si) __A, + (__v4si)_mm_undefined_si128(), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, - (__v16si) __O, __M); + return (__m512i)__builtin_ia32_selectd_512(__M, + (__v16si) _mm512_broadcastd_epi32(__A), + (__v16si) __O); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, - (__v16si) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectd_512(__M, + (__v16si) _mm512_broadcastd_epi32(__A), + (__v16si) _mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastq_epi64 (__m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, - (__v8di) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512i)__builtin_shufflevector((__v2di) __A, + (__v2di) _mm_undefined_si128(), + 0, 0, 0, 0, 0, 0, 0, 0); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, - (__v8di) __O, __M); + return (__m512i)__builtin_ia32_selectq_512(__M, + (__v8di) _mm512_broadcastq_epi64(__A), + (__v8di) __O); + } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { - return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, - (__v8di) - _mm512_setzero_si512 (), - __M); + return (__m512i)__builtin_ia32_selectq_512(__M, + (__v8di) _mm512_broadcastq_epi64(__A), + (__v8di) _mm512_setzero_si512()); } static __inline __m512i __DEFAULT_FN_ATTRS @@ -334,13 +334,11 @@ _mm512_set1_epi64(long long __d) } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_broadcastss_ps(__m128 __X) +_mm512_broadcastss_ps(__m128 __A) { - float __f = __X[0]; - return (__v16sf){ __f, __f, __f, __f, - __f, __f, __f, __f, - __f, __f, __f, __f, - __f, __f, __f, __f }; + return (__m512)__builtin_shufflevector((__v4sf) __A, + (__v4sf)_mm_undefined_ps(), + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } static __inline __m512i __DEFAULT_FN_ATTRS @@ -387,11 +385,11 @@ _mm512_set4_ps (float __A, float __B, float __C, float __D) _mm512_set4_ps((e3),(e2),(e1),(e0)) static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_broadcastsd_pd(__m128d __X) +_mm512_broadcastsd_pd(__m128d __A) { - double __d = __X[0]; - return (__v8df){ __d, __d, __d, __d, - __d, __d, __d, __d }; + return (__m512d)__builtin_shufflevector((__v2df) __A, + (__v2df) _mm_undefined_pd(), + 0, 0, 0, 0, 0, 0, 0, 0); } /* Cast between vector types */ @@ -7384,33 +7382,33 @@ _mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) { - return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, - (__v8df) __O, __M); + return (__m512d)__builtin_ia32_selectpd_512(__M, + (__v8df) _mm512_broadcastsd_pd(__A), + (__v8df) __O); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { - return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, - (__v8df) - _mm512_setzero_pd (), - __M); + return (__m512d)__builtin_ia32_selectpd_512(__M, + (__v8df) _mm512_broadcastsd_pd(__A), + (__v8df) _mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, - (__v16sf) __O, __M); + return (__m512)__builtin_ia32_selectps_512(__M, + (__v16sf) _mm512_broadcastss_ps(__A), + (__v16sf) __O); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) { - return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, - (__v16sf) - _mm512_setzero_ps (), - __M); + return (__m512)__builtin_ia32_selectps_512(__M, + (__v16sf) _mm512_broadcastss_ps(__A), + (__v16sf) _mm512_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h index ad8b8c1406..990e992a11 100644 --- a/lib/Headers/avx512vlbwintrin.h +++ b/lib/Headers/avx512vlbwintrin.h @@ -3191,65 +3191,65 @@ _mm256_movm_epi16 (__mmask16 __A) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A, - (__v16qi) __O, - __M); + return (__m128i)__builtin_ia32_selectb_128(__M, + (__v16qi) _mm_broadcastb_epi8(__A), + (__v16qi) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A, - (__v16qi) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectb_128(__M, + (__v16qi) _mm_broadcastb_epi8(__A), + (__v16qi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A, - (__v32qi) __O, - __M); + return (__m256i)__builtin_ia32_selectb_256(__M, + (__v32qi) _mm256_broadcastb_epi8(__A), + (__v32qi) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A, - (__v32qi) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectb_256(__M, + (__v32qi) _mm256_broadcastb_epi8(__A), + (__v32qi) _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A, - (__v8hi) __O, - __M); + return (__m128i)__builtin_ia32_selectw_128(__M, + (__v8hi) _mm_broadcastw_epi16(__A), + (__v8hi) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A, - (__v8hi) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectw_128(__M, + (__v8hi) _mm_broadcastw_epi16(__A), + (__v8hi) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A, - (__v16hi) __O, - __M); + return (__m256i)__builtin_ia32_selectw_256(__M, + (__v16hi) _mm256_broadcastw_epi16(__A), + (__v16hi) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A, - (__v16hi) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectw_256(__M, + (__v16hi) _mm256_broadcastw_epi16(__A), + (__v16hi) _mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 8e6a8a89f6..4cb61675e1 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -7570,113 +7570,113 @@ _mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A) static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) { - return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A, - (__v4df) __O, - __M); + return (__m256d)__builtin_ia32_selectpd_256(__M, + (__v4df) _mm256_broadcastsd_pd(__A), + (__v4df) __O); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { - return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A, - (__v4df) _mm256_setzero_pd (), - __M); + return (__m256d)__builtin_ia32_selectpd_256(__M, + (__v4df) _mm256_broadcastsd_pd(__A), + (__v4df) _mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) { - return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A, - (__v4sf) __O, - __M); + return (__m128)__builtin_ia32_selectps_128(__M, + (__v4sf) _mm_broadcastss_ps(__A), + (__v4sf) __O); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { - return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A, - (__v4sf) _mm_setzero_ps (), - __M); + return (__m128)__builtin_ia32_selectps_128(__M, + (__v4sf) _mm_broadcastss_ps(__A), + (__v4sf) _mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A, - (__v8sf) __O, - __M); + return (__m256)__builtin_ia32_selectps_256(__M, + (__v8sf) _mm256_broadcastss_ps(__A), + (__v8sf) __O); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { - return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A, - (__v8sf) _mm256_setzero_ps (), - __M); + return (__m256)__builtin_ia32_selectps_256(__M, + (__v8sf) _mm256_broadcastss_ps(__A), + (__v8sf) _mm256_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A, - (__v4si) __O, - __M); + return (__m128i)__builtin_ia32_selectd_128(__M, + (__v4si) _mm_broadcastd_epi32(__A), + (__v4si) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A, - (__v4si) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectd_128(__M, + (__v4si) _mm_broadcastd_epi32(__A), + (__v4si) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A, - (__v8si) __O, - __M); + return (__m256i)__builtin_ia32_selectd_256(__M, + (__v8si) _mm256_broadcastd_epi32(__A), + (__v8si) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A, - (__v8si) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectd_256(__M, + (__v8si) _mm256_broadcastd_epi32(__A), + (__v8si) _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A, - (__v2di) __O, - __M); + return (__m128i)__builtin_ia32_selectq_128(__M, + (__v2di) _mm_broadcastq_epi64(__A), + (__v2di) __O); } static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { - return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A, - (__v2di) _mm_setzero_si128 (), - __M); + return (__m128i)__builtin_ia32_selectq_128(__M, + (__v2di) _mm_broadcastq_epi64(__A), + (__v2di) _mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A, - (__v4di) __O, - __M); + return (__m256i)__builtin_ia32_selectq_256(__M, + (__v4di) _mm256_broadcastq_epi64(__A), + (__v4di) __O); } static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { - return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A, - (__v4di) _mm256_setzero_si256 (), - __M); + return (__m256i)__builtin_ia32_selectq_256(__M, + (__v4di) _mm256_broadcastq_epi64(__A), + (__v4di) _mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 75efcbaa75..1cd0a0ccb1 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -1473,41 +1473,44 @@ __m512i test_mm512_movm_epi16(__mmask32 __A) { return _mm512_movm_epi16(__A); } - __m512i test_mm512_broadcastb_epi8(__m128i __A) { // CHECK-LABEL: @test_mm512_broadcastb_epi8 - // CHECK: @llvm.x86.avx512.pbroadcastb.512 - return _mm512_broadcastb_epi8(__A); + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <64 x i32> zeroinitializer + return _mm512_broadcastb_epi8(__A); } __m512i test_mm512_mask_broadcastb_epi8(__m512i __O, __mmask64 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_mask_broadcastb_epi8 - // CHECK: @llvm.x86.avx512.pbroadcastb.512 - return _mm512_mask_broadcastb_epi8(__O, __M, __A); + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <64 x i32> zeroinitializer + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} + return _mm512_mask_broadcastb_epi8(__O, __M, __A); } __m512i test_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_maskz_broadcastb_epi8 - // CHECK: @llvm.x86.avx512.pbroadcastb.512 - return _mm512_maskz_broadcastb_epi8(__M, __A); + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> undef, <64 x i32> zeroinitializer + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}} + return _mm512_maskz_broadcastb_epi8(__M, __A); } __m512i test_mm512_broadcastw_epi16(__m128i __A) { // CHECK-LABEL: @test_mm512_broadcastw_epi16 - // CHECK: @llvm.x86.avx512.pbroadcastw.512 - return _mm512_broadcastw_epi16(__A); + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <32 x i32> zeroinitializer + return _mm512_broadcastw_epi16(__A); } __m512i test_mm512_mask_broadcastw_epi16(__m512i __O, __mmask32 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_mask_broadcastw_epi16 - // CHECK: @llvm.x86.avx512.pbroadcastw.512 - return _mm512_mask_broadcastw_epi16(__O, __M, __A); + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <32 x i32> zeroinitializer + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_mask_broadcastw_epi16(__O, __M, __A); } __m512i test_mm512_maskz_broadcastw_epi16(__mmask32 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_maskz_broadcastw_epi16 - // CHECK: @llvm.x86.avx512.pbroadcastw.512 - return _mm512_maskz_broadcastw_epi16(__M, __A); + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> undef, <32 x i32> zeroinitializer + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} + return _mm512_maskz_broadcastw_epi16(__M, __A); } __m512i test_mm512_mask_set1_epi16(__m512i __O, __mmask32 __M, short __A) { diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 018ad93f8d..b8121b2f75 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -434,20 +434,6 @@ __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b) return _mm512_maskz_alignr_epi64(u, a, b, 2); } -__m512d test_mm512_broadcastsd_pd(__m128d a) -{ - // CHECK-LABEL: @test_mm512_broadcastsd_pd - // CHECK: insertelement <8 x double> {{.*}}, i32 0 - // CHECK: insertelement <8 x double> {{.*}}, i32 1 - // CHECK: insertelement <8 x double> {{.*}}, i32 2 - // CHECK: insertelement <8 x double> {{.*}}, i32 3 - // CHECK: insertelement <8 x double> {{.*}}, i32 4 - // CHECK: insertelement <8 x double> {{.*}}, i32 5 - // CHECK: insertelement <8 x double> {{.*}}, i32 6 - // CHECK: insertelement <8 x double> {{.*}}, i32 7 - return _mm512_broadcastsd_pd(a); -} - __m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) { // CHECK-LABEL: @test_mm512_fmadd_round_pd // CHECK: @llvm.x86.avx512.mask.vfmadd.pd.512 @@ -4388,64 +4374,84 @@ __m512i test_mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) { return _mm512_maskz_broadcast_i64x4(__M, __A); } +__m512d test_mm512_broadcastsd_pd(__m128d __A) { + // CHECK-LABEL: @test_mm512_broadcastsd_pd + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <8 x i32> zeroinitializer + return _mm512_broadcastsd_pd(__A); +} + __m512d test_mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A) { // CHECK-LABEL: @test_mm512_mask_broadcastsd_pd - // CHECK: @llvm.x86.avx512.mask.broadcast.sd.pd.512 - return _mm512_mask_broadcastsd_pd(__O, __M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_broadcastsd_pd(__O, __M, __A); } __m512d test_mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { // CHECK-LABEL: @test_mm512_maskz_broadcastsd_pd - // CHECK: @llvm.x86.avx512.mask.broadcast.sd.pd.512 - return _mm512_maskz_broadcastsd_pd(__M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> undef, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_maskz_broadcastsd_pd(__M, __A); +} + +__m512 test_mm512_broadcastss_ps(__m128 __A) { + // CHECK-LABEL: @test_mm512_broadcastss_ps + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <16 x i32> zeroinitializer + return _mm512_broadcastss_ps(__A); } __m512 test_mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A) { // CHECK-LABEL: @test_mm512_mask_broadcastss_ps - // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.512 - return _mm512_mask_broadcastss_ps(__O, __M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_broadcastss_ps(__O, __M, __A); } __m512 test_mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A) { // CHECK-LABEL: @test_mm512_maskz_broadcastss_ps - // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.512 - return _mm512_maskz_broadcastss_ps(__M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> undef, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_maskz_broadcastss_ps(__M, __A); } __m512i test_mm512_broadcastd_epi32(__m128i __A) { // CHECK-LABEL: @test_mm512_broadcastd_epi32 - // CHECK: @llvm.x86.avx512.pbroadcastd.512 - return _mm512_broadcastd_epi32(__A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <16 x i32> zeroinitializer + return _mm512_broadcastd_epi32(__A); } __m512i test_mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_mask_broadcastd_epi32 - // CHECK: @llvm.x86.avx512.pbroadcastd.512 - return _mm512_mask_broadcastd_epi32(__O, __M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_mask_broadcastd_epi32(__O, __M, __A); } __m512i test_mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_maskz_broadcastd_epi32 - // CHECK: @llvm.x86.avx512.pbroadcastd.512 - return _mm512_maskz_broadcastd_epi32(__M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} + return _mm512_maskz_broadcastd_epi32(__M, __A); } __m512i test_mm512_broadcastq_epi64(__m128i __A) { // CHECK-LABEL: @test_mm512_broadcastq_epi64 - // CHECK: @llvm.x86.avx512.pbroadcastq.512 - return _mm512_broadcastq_epi64(__A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <8 x i32> zeroinitializer + return _mm512_broadcastq_epi64(__A); } __m512i test_mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_mask_broadcastq_epi64 - // CHECK: @llvm.x86.avx512.pbroadcastq.512 - return _mm512_mask_broadcastq_epi64(__O, __M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_mask_broadcastq_epi64(__O, __M, __A); } __m512i test_mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm512_maskz_broadcastq_epi64 - // CHECK: @llvm.x86.avx512.pbroadcastq.512 - return _mm512_maskz_broadcastq_epi64(__M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> undef, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} + return _mm512_maskz_broadcastq_epi64(__M, __A); } __m128i test_mm512_cvtsepi32_epi8(__m512i __A) { diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index f27849a15c..c4f70fbddc 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -5353,86 +5353,100 @@ __m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { __m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) { // CHECK-LABEL: @test_mm256_mask_broadcastsd_pd - // CHECK: @llvm.x86.avx512.mask.broadcast.sd.pd.256 - return _mm256_mask_broadcastsd_pd(__O, __M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_mask_broadcastsd_pd(__O, __M, __A); } __m256d test_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { // CHECK-LABEL: @test_mm256_maskz_broadcastsd_pd - // CHECK: @llvm.x86.avx512.mask.broadcast.sd.pd.256 - return _mm256_maskz_broadcastsd_pd(__M, __A); + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm256_maskz_broadcastsd_pd(__M, __A); } __m128 test_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm_mask_broadcastss_ps - // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.128 - return _mm_mask_broadcastss_ps(__O, __M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_mask_broadcastss_ps(__O, __M, __A); } __m128 test_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm_maskz_broadcastss_ps - // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.128 - return _mm_maskz_broadcastss_ps(__M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm_maskz_broadcastss_ps(__M, __A); } __m256 test_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_mask_broadcastss_ps - // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.256 - return _mm256_mask_broadcastss_ps(__O, __M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_mask_broadcastss_ps(__O, __M, __A); } __m256 test_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { // CHECK-LABEL: @test_mm256_maskz_broadcastss_ps - // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.256 - return _mm256_maskz_broadcastss_ps(__M, __A); + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} + return _mm256_maskz_broadcastss_ps(__M, __A); } __m128i test_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_mask_broadcastd_epi32 - // CHECK: @llvm.x86.avx512.pbroadcastd.128 - return _mm_mask_broadcastd_epi32(__O, __M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + return _mm_mask_broadcastd_epi32(__O, __M, __A); } __m128i test_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_broadcastd_epi32 - // CHECK: @llvm.x86.avx512.pbroadcastd.128 - return _mm_maskz_broadcastd_epi32(__M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} + return _mm_maskz_broadcastd_epi32(__M, __A); } __m256i test_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_broadcastd_epi32 - // CHECK: @llvm.x86.avx512.pbroadcastd.256 - return _mm256_mask_broadcastd_epi32(__O, __M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + return _mm256_mask_broadcastd_epi32(__O, __M, __A); } __m256i test_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_maskz_broadcastd_epi32 - // CHECK: @llvm.x86.avx512.pbroadcastd.256 - return _mm256_maskz_broadcastd_epi32(__M, __A); + // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} + return _mm256_maskz_broadcastd_epi32(__M, __A); } __m128i test_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_mask_broadcastq_epi64 - // CHECK: @llvm.x86.avx512.pbroadcastq.128 - return _mm_mask_broadcastq_epi64(__O, __M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + return _mm_mask_broadcastq_epi64(__O, __M, __A); } __m128i test_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_broadcastq_epi64 - // CHECK: @llvm.x86.avx512.pbroadcastq.128 - return _mm_maskz_broadcastq_epi64(__M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} + return _mm_maskz_broadcastq_epi64(__M, __A); } __m256i test_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_broadcastq_epi64 - // CHECK: @llvm.x86.avx512.pbroadcastq.256 - return _mm256_mask_broadcastq_epi64(__O, __M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + return _mm256_mask_broadcastq_epi64(__O, __M, __A); } __m256i test_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_maskz_broadcastq_epi64 - // CHECK: @llvm.x86.avx512.pbroadcastq.256 - return _mm256_maskz_broadcastq_epi64(__M, __A); + // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} + return _mm256_maskz_broadcastq_epi64(__M, __A); } __m128i test_mm_cvtsepi32_epi8(__m128i __A) { diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c index 6779b7275d..6bfa09f5d1 100644 --- a/test/CodeGen/avx512vlbw-builtins.c +++ b/test/CodeGen/avx512vlbw-builtins.c @@ -2280,54 +2280,60 @@ __m256i test_mm256_movm_epi16(__mmask16 __A) { return _mm256_movm_epi16(__A); } - - __m128i test_mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm_mask_broadcastb_epi8 - // CHECK: @llvm.x86.avx512.pbroadcastb.128 - return _mm_mask_broadcastb_epi8(__O, __M, __A); + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_mask_broadcastb_epi8(__O, __M, __A); } __m128i test_mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_broadcastb_epi8 - // CHECK: @llvm.x86.avx512.pbroadcastb.128 - return _mm_maskz_broadcastb_epi8(__M, __A); + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}} + return _mm_maskz_broadcastb_epi8(__M, __A); } __m256i test_mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_broadcastb_epi8 - // CHECK: @llvm.x86.avx512.pbroadcastb.256 - return _mm256_mask_broadcastb_epi8(__O, __M, __A); + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + return _mm256_mask_broadcastb_epi8(__O, __M, __A); } __m256i test_mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_maskz_broadcastb_epi8 - // CHECK: @llvm.x86.avx512.pbroadcastb.256 - return _mm256_maskz_broadcastb_epi8(__M, __A); + // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer + // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}} + return _mm256_maskz_broadcastb_epi8(__M, __A); } __m128i test_mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_mask_broadcastw_epi16 - // CHECK: @llvm.x86.avx512.pbroadcastw.128 - return _mm_mask_broadcastw_epi16(__O, __M, __A); + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_mask_broadcastw_epi16(__O, __M, __A); } __m128i test_mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_broadcastw_epi16 - // CHECK: @llvm.x86.avx512.pbroadcastw.128 - return _mm_maskz_broadcastw_epi16(__M, __A); + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer + // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} + return _mm_maskz_broadcastw_epi16(__M, __A); } __m256i test_mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_mask_broadcastw_epi16 - // CHECK: @llvm.x86.avx512.pbroadcastw.256 - return _mm256_mask_broadcastw_epi16(__O, __M, __A); + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_mask_broadcastw_epi16(__O, __M, __A); } __m256i test_mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A) { // CHECK-LABEL: @test_mm256_maskz_broadcastw_epi16 - // CHECK: @llvm.x86.avx512.pbroadcastw.256 - return _mm256_maskz_broadcastw_epi16(__M, __A); + // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer + // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} + return _mm256_maskz_broadcastw_epi16(__M, __A); } __m256i test_mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A) { -- 2.40.0