From: Craig Topper Date: Wed, 31 Aug 2016 05:38:58 +0000 (+0000) Subject: [AVX-512] Implement masked floating point logical operations with native IR and remov... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=616c97af74d59b5c83610ffb19168eb7413d4616;p=clang [AVX-512] Implement masked floating point logical operations with native IR and remove the builtins. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@280197 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 0accba4aba..2d616e0ea5 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1131,32 +1131,8 @@ TARGET_BUILTIN(__builtin_ia32_psubd512_mask, "V16iV16iV16iV16iUs", "", "avx512f" TARGET_BUILTIN(__builtin_ia32_pmulld512_mask, "V16iV16iV16iV16iUs", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_pmullq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_orpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_orps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq") TARGET_BUILTIN(__builtin_ia32_pmullq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_pmullq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andnps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_andps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_xorps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq") TARGET_BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "", "avx512bw") TARGET_BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "", "avx512bw") diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index b60bec12bc..2189e9066d 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -54,179 +54,155 @@ _mm512_maskz_mullo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_xor_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8du) __A ^ (__v8du) __B); +_mm512_xor_pd(__m512d __A, __m512d __B) { + return (__m512d)((__v8du)__A ^ (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_xor_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_xorpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_xor_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_xor_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16su) __A ^ (__v16su) __B); + return (__m512)((__v16su)__A ^ (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_xor_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_xorps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_xor_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_or_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8du) __A | (__v8du) __B); +_mm512_or_pd(__m512d __A, __m512d __B) { + return (__m512d)((__v8du)__A | (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_or_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_orpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_or_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_or_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16su) __A | (__v16su) __B); +_mm512_or_ps(__m512 __A, __m512 __B) { + return (__m512)((__v16su)__A | (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_or_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_orps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_or_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_and_pd (__m512d __A, __m512d __B) { - return (__m512d) ((__v8du) __A & (__v8du) __B); +_mm512_and_pd(__m512d __A, __m512d __B) { + return (__m512d)((__v8du)__A & (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_and_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_and_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_and_ps (__m512 __A, __m512 __B) { - return (__m512) ((__v16su) __A & (__v16su) __B); +_mm512_and_ps(__m512 __A, __m512 __B) { + return (__m512)((__v16su)__A & (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_and_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_and_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_andnot_pd (__m512d __A, __m512d __B) { +_mm512_andnot_pd(__m512d __A, __m512d __B) { return (__m512d)(~(__v8du)__A & (__v8du)__B); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) __W, - (__mmask8) __U); +_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_andnot_pd(__A, __B), + (__v8df)__W); } static __inline__ __m512d __DEFAULT_FN_ATTRS -_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { - return (__m512d) __builtin_ia32_andnpd512_mask ((__v8df) __A, - (__v8df) __B, - (__v8df) - _mm512_setzero_pd (), - (__mmask8) __U); +_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) { + return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, + (__v8df)_mm512_andnot_pd(__A, __B), + (__v8df)_mm512_setzero_pd()); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_andnot_ps (__m512 __A, __m512 __B) { +_mm512_andnot_ps(__m512 __A, __m512 __B) { return (__m512)(~(__v16su)__A & (__v16su)__B); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) __W, - (__mmask16) __U); +_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_andnot_ps(__A, __B), + (__v16sf)__W); } static __inline__ __m512 __DEFAULT_FN_ATTRS -_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { - return (__m512) __builtin_ia32_andnps512_mask ((__v16sf) __A, - (__v16sf) __B, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) __U); +_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_andnot_ps(__A, __B), + (__v16sf)_mm512_setzero_ps()); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h index 8187bcd6b2..66aa6420c0 100644 --- a/lib/Headers/avx512vldqintrin.h +++ b/lib/Headers/avx512vldqintrin.h @@ -76,276 +76,227 @@ _mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_andnot_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_andnot_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_andnot_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); +_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_andnot_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_andnot_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_andnot_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_andnot_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_andnot_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_and_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_and_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_and_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); +_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_and_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_and_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_and_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_and_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_and_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, - __m256d __B) { - return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_xor_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_xor_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_xor_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_xor_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_xor_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_xor_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_xor_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_xor_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) __W, - (__mmask8) __U); +_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_or_pd(__A, __B), + (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS -_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { - return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, - (__v4df) __B, - (__v4df) - _mm256_setzero_pd (), - (__mmask8) __U); +_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { + return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, + (__v4df)_mm256_or_pd(__A, __B), + (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) __W, - (__mmask8) __U); +_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_or_pd(__A, __B), + (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS -_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, - (__v2df) __B, - (__v2df) - _mm_setzero_pd (), - (__mmask8) __U); +_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { + return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, + (__v2df)_mm_or_pd(__A, __B), + (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) __W, - (__mmask8) __U); +_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_or_ps(__A, __B), + (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS -_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { - return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, - (__v8sf) __B, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) __U); +_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_or_ps(__A, __B), + (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) __W, - (__mmask8) __U); +_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_or_ps(__A, __B), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, - (__v4sf) __B, - (__v4sf) - _mm_setzero_ps (), - (__mmask8) __U); +_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_or_ps(__A, __B), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c index 91bfbaf52b..ac6e3420d6 100644 --- a/test/CodeGen/avx512dq-builtins.c +++ b/test/CodeGen/avx512dq-builtins.c @@ -31,13 +31,17 @@ __m512d test_mm512_xor_pd (__m512d __A, __m512d __B) { __m512d test_mm512_mask_xor_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.512 + // CHECK: xor <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_xor_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_xor_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.512 + // CHECK: xor <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_xor_pd(__U, __A, __B); } @@ -49,13 +53,17 @@ __m512 test_mm512_xor_ps (__m512 __A, __m512 __B) { __m512 test_mm512_mask_xor_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.512 + // CHECK: xor <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_xor_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_xor_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.512 + // CHECK: xor <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_xor_ps(__U, __A, __B); } @@ -67,13 +75,17 @@ __m512d test_mm512_or_pd (__m512d __A, __m512d __B) { __m512d test_mm512_mask_or_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.512 + // CHECK: or <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_or_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_or_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.512 + // CHECK: or <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_or_pd(__U, __A, __B); } @@ -85,13 +97,17 @@ __m512 test_mm512_or_ps (__m512 __A, __m512 __B) { __m512 test_mm512_mask_or_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.512 + // CHECK: or <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_or_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_or_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.512 + // CHECK: or <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_or_ps(__U, __A, __B); } @@ -103,13 +119,17 @@ __m512d test_mm512_and_pd (__m512d __A, __m512d __B) { __m512d test_mm512_mask_and_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.512 + // CHECK: and <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_and_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_and_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.512 + // CHECK: and <8 x i64> + // CHECK: %[[MASK:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // CHECK: select <8 x i1> %[[MASK]], <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_and_pd(__U, __A, __B); } @@ -121,13 +141,17 @@ __m512 test_mm512_and_ps (__m512 __A, __m512 __B) { __m512 test_mm512_mask_and_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.512 + // CHECK: and <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_and_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.512 + // CHECK: and <16 x i32> + // CHECK: %[[MASK:.*]] = bitcast i16 %{{.*}} to <16 x i1> + // CHECK: select <16 x i1> %[[MASK]], <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_and_ps(__U, __A, __B); } @@ -140,13 +164,17 @@ __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) { __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_mask_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.512 + // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: and <8 x i64> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B); } __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.512 + // CHECK: xor <8 x i64> %{{.*}}, + // CHECK: and <8 x i64> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}} return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B); } @@ -159,13 +187,17 @@ __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) { __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_mask_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.512 + // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: and <16 x i32> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B); } __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) { // CHECK-LABEL: @test_mm512_maskz_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.512 + // CHECK: xor <16 x i32> %{{.*}}, + // CHECK: and <16 x i32> %{{.*}}, %{{.*}} + // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}} return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B); } diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c index 68d793c923..75af588c91 100644 --- a/test/CodeGen/avx512vldq-builtins.c +++ b/test/CodeGen/avx512vldq-builtins.c @@ -43,193 +43,233 @@ __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B); } __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_andnot_pd - // CHECK: @llvm.x86.avx512.mask.andn.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B); } __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B); } __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.128 + // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_andnot_ps - // CHECK: @llvm.x86.avx512.mask.andn.ps.128 + // CHECK: xor <4 x i32> %{{.*}}, + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_andnot_ps (__U, __A, __B); } __m256d test_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.256 + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_and_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.256 + // CHECK: and <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_and_pd (__U, __A, __B); } __m128d test_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.128 + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_and_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_and_pd - // CHECK: @llvm.x86.avx512.mask.and.pd.128 + // CHECK: and <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_and_pd (__U, __A, __B); } __m256 test_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.256 + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_and_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.256 + // CHECK: and <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_and_ps (__U, __A, __B); } __m128 test_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.128 + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_and_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_and_ps - // CHECK: @llvm.x86.avx512.mask.and.ps.128 + // CHECK: and <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_and_ps (__U, __A, __B); } __m256d test_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_xor_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.256 + // CHECK: xor <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_xor_pd (__U, __A, __B); } __m128d test_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_xor_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_xor_pd - // CHECK: @llvm.x86.avx512.mask.xor.pd.128 + // CHECK: xor <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_xor_pd (__U, __A, __B); } __m256 test_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_xor_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.256 + // CHECK: xor <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_xor_ps (__U, __A, __B); } __m128 test_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.128 - return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B); + // CHECK: xor <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return (__m128) _mm_mask_xor_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_xor_ps - // CHECK: @llvm.x86.avx512.mask.xor.ps.128 + // CHECK: xor <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_xor_ps (__U, __A, __B); } __m256d test_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_mask_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.256 + // CHECK: or <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_mask_or_pd ( __W, __U, __A, __B); } __m256d test_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { // CHECK-LABEL: @test_mm256_maskz_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.256 + // CHECK: or <4 x i64> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} return (__m256d) _mm256_maskz_or_pd (__U, __A, __B); } __m128d test_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_mask_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.128 + // CHECK: or <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_mask_or_pd ( __W, __U, __A, __B); } __m128d test_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { // CHECK-LABEL: @test_mm_maskz_or_pd - // CHECK: @llvm.x86.avx512.mask.or.pd.128 + // CHECK: or <2 x i64> %{{.*}}, %{{.*}} + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return (__m128d) _mm_maskz_or_pd (__U, __A, __B); } __m256 test_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_mask_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.256 + // CHECK: or <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_mask_or_ps ( __W, __U, __A, __B); } __m256 test_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { // CHECK-LABEL: @test_mm256_maskz_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.256 + // CHECK: or <8 x i32> %{{.*}}, %{{.*}} + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return (__m256) _mm256_maskz_or_ps (__U, __A, __B); } __m128 test_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_mask_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.128 + // CHECK: or <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_mask_or_ps ( __W, __U, __A, __B); } __m128 test_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_maskz_or_ps - // CHECK: @llvm.x86.avx512.mask.or.ps.128 + // CHECK: or <4 x i32> %{{.*}}, %{{.*}} + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return (__m128) _mm_maskz_or_ps(__U, __A, __B); }