From: Simon Pilgrim Date: Sat, 2 Jul 2016 17:16:25 +0000 (+0000) Subject: [X86][AVX512] Converted the MOVDDUP/MOVSLDUP/MOVSHDUP masked intrinsics to generic IR X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4c6708efade45ae840470bdeedd6c891a31265fa;p=clang [X86][AVX512] Converted the MOVDDUP/MOVSLDUP/MOVSHDUP masked intrinsics to generic IR llvm companion patch imminent git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@274442 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index e15462b86a..7263a78756 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1668,9 +1668,6 @@ TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc","","av TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movddup512_mask, "V8dV8dV8dUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movddup128_mask, "V2dV2dV2dUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movddup256_mask, "V4dV4dV4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_gpr_mask, "V64ccV64cULLi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_gpr_mask, "V16ccV16cUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_gpr_mask, "V32ccV32cUi","","avx512bw,avx512vl") @@ -2122,12 +2119,6 @@ TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movshdup512_mask, "V16fV16fV16fUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movsldup512_mask, "V16fV16fV16fUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_movshdup128_mask, "V4fV4fV4fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc","","avx512f") diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index d37414adc2..b5c468a1ca 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -5572,32 +5572,27 @@ _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) (__mmask8) __U); } - - static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_movedup_pd (__m512d __A) { - return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, - (__v8df) - _mm512_undefined_pd (), - (__mmask8) -1); + return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, + 0, 0, 2, 2, 4, 4, 6, 6); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, - (__v8df) __W, - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_movedup_pd(__A), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) { - return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_movedup_pd(__A), + (__v8df)_mm512_setzero_pd()); } #define _mm512_fixupimm_round_pd(A, B, C, imm, R) __extension__ ({ \ @@ -8988,53 +8983,47 @@ _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_movehdup_ps (__m512 __A) { - return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, + 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, - (__v16sf) __W, - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_movehdup_ps(__A), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_movehdup_ps(__A), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_moveldup_ps (__m512 __A) { - return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, - (__v16sf) - _mm512_undefined_ps (), - (__mmask16) -1); + return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, + 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, - (__v16sf) __W, - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_moveldup_ps(__A), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) { - return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_moveldup_ps(__A), + (__v16sf)_mm512_setzero_ps()); } #define _mm512_shuffle_epi32(A, I) __extension__ ({ \ diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index e97ce7179c..5b875d534c 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -6010,35 +6010,33 @@ _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) { - return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A, - (__v2df) __W, - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_movedup_pd(__A), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) { - return (__m128d) __builtin_ia32_movddup128_mask ((__v2df) __A, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_movedup_pd(__A), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) { - return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A, - (__v4df) __W, - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_movedup_pd(__A), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) { - return (__m256d) __builtin_ia32_movddup256_mask ((__v4df) __A, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_movedup_pd(__A), + (__v4df)_mm256_setzero_pd()); } @@ -9025,69 +9023,65 @@ _mm256_permutexvar_epi32 (__m256i __X, __m256i __Y) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) { - return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_movehdup_ps(__A), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) { - return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_movehdup_ps(__A), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) { - return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A, - (__v8sf) __W, - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_movehdup_ps(__A), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) { - return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_movehdup_ps(__A), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) { - return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_moveldup_ps(__A), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) { - return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_moveldup_ps(__A), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) { - return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A, - (__v8sf) __W, - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_moveldup_ps(__A), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) { - return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_moveldup_ps(__A), + (__v8sf)_mm256_setzero_ps()); } #define _mm256_mask_shuffle_epi32(W, U, A, I) __extension__({\ diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 1f615ba561..d638b03dee 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -2680,20 +2680,22 @@ void test_mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A) { __m512d test_mm512_movedup_pd(__m512d __A) { // CHECK-LABEL: @test_mm512_movedup_pd - // CHECK: @llvm.x86.avx512.mask.movddup.512 - return _mm512_movedup_pd(__A); + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + return _mm512_movedup_pd(__A); } __m512d test_mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_movedup_pd - // CHECK: @llvm.x86.avx512.mask.movddup.512 - return _mm512_mask_movedup_pd(__W, __U, __A); + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_mask_movedup_pd(__W, __U, __A); } __m512d test_mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_maskz_movedup_pd - // CHECK: @llvm.x86.avx512.mask.movddup.512 - return _mm512_maskz_movedup_pd(__U, __A); + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} + return _mm512_maskz_movedup_pd(__U, __A); } int test_mm_comi_round_sd(__m128d __A, __m128d __B) { @@ -5974,38 +5976,42 @@ __mmask8 test_mm_mask_cmp_sd_mask(__mmask8 __M, __m128d __X, __m128d __Y) { __m512 test_mm512_movehdup_ps(__m512 __A) { // CHECK-LABEL: @test_mm512_movehdup_ps - // CHECK: @llvm.x86.avx512.mask.movshdup.512 - return _mm512_movehdup_ps(__A); + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + return _mm512_movehdup_ps(__A); } __m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_mask_movehdup_ps - // CHECK: @llvm.x86.avx512.mask.movshdup.512 - return _mm512_mask_movehdup_ps(__W, __U, __A); + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_movehdup_ps(__W, __U, __A); } __m512 test_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_maskz_movehdup_ps - // CHECK: @llvm.x86.avx512.mask.movshdup.512 - return _mm512_maskz_movehdup_ps(__U, __A); + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_maskz_movehdup_ps(__U, __A); } __m512 test_mm512_moveldup_ps(__m512 __A) { // CHECK-LABEL: @test_mm512_moveldup_ps - // CHECK: @llvm.x86.avx512.mask.movsldup.512 - return _mm512_moveldup_ps(__A); + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + return _mm512_moveldup_ps(__A); } __m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_mask_moveldup_ps - // CHECK: @llvm.x86.avx512.mask.movsldup.512 - return _mm512_mask_moveldup_ps(__W, __U, __A); + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_mask_moveldup_ps(__W, __U, __A); } __m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_maskz_moveldup_ps - // CHECK: @llvm.x86.avx512.mask.movsldup.512 - return _mm512_maskz_moveldup_ps(__U, __A); + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} + return _mm512_maskz_moveldup_ps(__U, __A); } __m512i test_mm512_shuffle_epi32(__m512i __A) { diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index 33fcae285e..750c60c123 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -4066,25 +4066,29 @@ void test_mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A) { __m128d test_mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK-LABEL: @test_mm_mask_movedup_pd - // CHECK: @llvm.x86.avx512.mask.movddup.128 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_mask_movedup_pd(__W, __U, __A); } __m128d test_mm_maskz_movedup_pd(__mmask8 __U, __m128d __A) { // CHECK-LABEL: @test_mm_maskz_movedup_pd - // CHECK: @llvm.x86.avx512.mask.movddup.128 + // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm_maskz_movedup_pd(__U, __A); } __m256d test_mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_mask_movedup_pd - // CHECK: @llvm.x86.avx512.mask.movddup.256 + // CHECK-LABEL: @test_mm256_mask_movedup_pd + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_mask_movedup_pd(__W, __U, __A); } __m256d test_mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A) { - // CHECK-LABEL: @test_mm256_maskz_movedup_pd - // CHECK: @llvm.x86.avx512.mask.movddup.256 + // CHECK-LABEL: @test_mm256_maskz_movedup_pd + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return _mm256_maskz_movedup_pd(__U, __A); } @@ -6559,49 +6563,57 @@ __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { __m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_mask_movehdup_ps - // CHECK: @llvm.x86.avx512.mask.movshdup.128 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_movehdup_ps(__W, __U, __A); } __m128 test_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_maskz_movehdup_ps - // CHECK: @llvm.x86.avx512.mask.movshdup.128 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_movehdup_ps(__U, __A); } __m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_mask_movehdup_ps - // CHECK: @llvm.x86.avx512.mask.movshdup.256 + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_movehdup_ps(__W, __U, __A); } __m256 test_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_maskz_movehdup_ps - // CHECK: @llvm.x86.avx512.mask.movshdup.256 + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_movehdup_ps(__U, __A); } __m128 test_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_mask_moveldup_ps - // CHECK: @llvm.x86.avx512.mask.movsldup.128 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_mask_moveldup_ps(__W, __U, __A); } __m128 test_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_maskz_moveldup_ps - // CHECK: @llvm.x86.avx512.mask.movsldup.128 + // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}} <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm_maskz_moveldup_ps(__U, __A); } __m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_mask_moveldup_ps - // CHECK: @llvm.x86.avx512.mask.movsldup.256 + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_mask_moveldup_ps(__W, __U, __A); } __m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_maskz_moveldup_ps - // CHECK: @llvm.x86.avx512.mask.movsldup.256 + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}} <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm256_maskz_moveldup_ps(__U, __A); }