From: Craig Topper Date: Thu, 31 May 2018 01:24:40 +0000 (+0000) Subject: [X86] Fix some places where macro arguments to intrinsics weren't cast to _m512(i... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=37a5f47adb9fefe8277b14e7d74c8a403c7176fb;p=clang [X86] Fix some places where macro arguments to intrinsics weren't cast to _m512(i|d)/_m256(i|d/_m128(i|d) first. The majority of the cases were correct. This fixes the few that weren't. I also removed some superfluous parentheses in non-macros that confused by attempts at grepping for missing casts. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@333615 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/__wmmintrin_pclmul.h b/lib/Headers/__wmmintrin_pclmul.h index 8f323c0636..e0f928796a 100644 --- a/lib/Headers/__wmmintrin_pclmul.h +++ b/lib/Headers/__wmmintrin_pclmul.h @@ -55,8 +55,8 @@ /// Bit[4]=1 indicates that bits[127:64] of operand \a __Y are used. /// \returns The 128-bit integer vector containing the result of the carry-less /// multiplication of the selected 64-bit values. -#define _mm_clmulepi64_si128(__X, __Y, __I) \ - ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \ - (__v2di)(__m128i)(__Y), (char)(__I))) +#define _mm_clmulepi64_si128(X, Y, I) \ + ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(X), \ + (__v2di)(__m128i)(Y), (char)(I))) #endif /* __WMMINTRIN_PCLMUL_H */ diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index d7563af949..6695a0bb5b 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -1119,7 +1119,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ - (__v8sf)(W)) + (__v8sf)(__m256)(W)) #define _mm512_maskz_extractf32x8_ps(U, A, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ @@ -1135,7 +1135,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ - (__v2df)(W)) + (__v2df)(__m128d)(W)) #define _mm512_maskz_extractf64x2_pd(U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ @@ -1157,7 +1157,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ - (__v8si)(W)) + (__v8si)(__m256i)(W)) #define _mm512_maskz_extracti32x8_epi32(U, A, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ @@ -1173,7 +1173,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ - (__v2di)(W)) + (__v2di)(__m128i)(W)) #define _mm512_maskz_extracti64x2_epi64(U, A, imm) \ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ @@ -1203,7 +1203,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_insertf32x8(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \ - (__v16sf)(W)) + (__v16sf)(__m512)(W)) #define _mm512_maskz_insertf32x8(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ @@ -1225,7 +1225,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_insertf64x2(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \ - (__v8df)(W)) + (__v8df)(__m512d)(W)) #define _mm512_maskz_insertf64x2(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -1255,7 +1255,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_inserti32x8(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \ - (__v16si)(W)) + (__v16si)(__m512i)(W)) #define _mm512_maskz_inserti32x8(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ @@ -1277,7 +1277,7 @@ _mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) #define _mm512_mask_inserti64x2(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \ - (__v8di)(W)) + (__v8di)(__m512i)(W)) #define _mm512_maskz_inserti64x2(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index f6795a6bfc..317cf215d6 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -679,13 +679,13 @@ _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_si512 (__m512i __A, __m512i __B) { - return (__m512i)(~(__v8du)(__A) & (__v8du)__B); + return (__m512i)(~(__v8du)__A & (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi32 (__m512i __A, __m512i __B) { - return (__m512i)(~(__v16su)(__A) & (__v16su)__B); + return (__m512i)(~(__v16su)__A & (__v16su)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -706,7 +706,7 @@ _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_andnot_epi64(__m512i __A, __m512i __B) { - return (__m512i)(~(__v8du)(__A) & (__v8du)__B); + return (__m512i)(~(__v8du)__A & (__v8du)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -3622,7 +3622,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ - (__v4df)(W)) + (__v4df)(__m256d)(W)) #define _mm512_maskz_extractf64x4_pd(U, A, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ @@ -3640,7 +3640,7 @@ _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ - (__v4sf)(W)) + (__v4sf)(__m128)(W)) #define _mm512_maskz_extractf32x4_ps(U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ @@ -4337,7 +4337,7 @@ _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \ (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \ - (__v8si)(W), \ + (__v8si)(__m256i)(W), \ (__mmask8)(U), (int)(R)) #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \ @@ -7736,7 +7736,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ - (__v4si)(W)) + (__v4si)(__m128i)(W)) #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ @@ -7754,7 +7754,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ - (__v4di)(W)) + (__v4di)(__m256i)(W)) #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ @@ -7776,7 +7776,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_mask_insertf64x4(W, U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \ - (__v8df)(W)) + (__v8df)(__m512d)(W)) #define _mm512_maskz_insertf64x4(U, A, B, imm) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ @@ -7798,7 +7798,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_mask_inserti64x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \ - (__v8di)(W)) + (__v8di)(__m512i)(W)) #define _mm512_maskz_inserti64x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ @@ -7828,7 +7828,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_mask_insertf32x4(W, U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \ - (__v16sf)(W)) + (__v16sf)(__m512)(W)) #define _mm512_maskz_insertf32x4(U, A, B, imm) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ @@ -7858,7 +7858,7 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) #define _mm512_mask_inserti32x4(W, U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \ - (__v16si)(W)) + (__v16si)(__m512i)(W)) #define _mm512_maskz_inserti32x4(U, A, B, imm) \ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ @@ -9386,19 +9386,19 @@ _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { - return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), - (__v2df)(__B), - (__v4sf)(__W), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, + (__v2df)__B, + (__v4sf)__W, + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) { - return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)(__A), - (__v2df)(__B), + return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, + (__v2df)__B, (__v4sf)_mm_setzero_ps(), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } #define _mm_cvtss_i32 _mm_cvtss_si32 @@ -9459,19 +9459,19 @@ _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { - return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), - (__v4sf)(__B), - (__v2df)(__W), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, + (__v4sf)__B, + (__v2df)__W, + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) { - return __builtin_ia32_cvtss2sd_round_mask((__v2df)(__A), - (__v4sf)(__B), - (__v2df)_mm_setzero_pd(), - (__mmask8)(__U), _MM_FROUND_CUR_DIRECTION); + return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, + (__v4sf)__B, + (__v2df)_mm_setzero_pd(), + (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } static __inline__ __m128d __DEFAULT_FN_ATTRS diff --git a/lib/Headers/avx512vbmi2intrin.h b/lib/Headers/avx512vbmi2intrin.h index a7d957edec..c19350ebfa 100644 --- a/lib/Headers/avx512vbmi2intrin.h +++ b/lib/Headers/avx512vbmi2intrin.h @@ -143,10 +143,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) } #define _mm512_mask_shldi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ - (__v8di)(B), \ + (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ (int)(I), \ - (__v8di)(S), \ + (__v8di)(__m512i)(S), \ (__mmask8)(U)) #define _mm512_maskz_shldi_epi64(U, A, B, I) \ @@ -156,10 +156,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) #define _mm512_mask_shldi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ - (__v16si)(B), \ + (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), \ (int)(I), \ - (__v16si)(S), \ + (__v16si)(__m512i)(S), \ (__mmask16)(U)) #define _mm512_maskz_shldi_epi32(U, A, B, I) \ @@ -169,10 +169,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) #define _mm512_mask_shldi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ - (__v32hi)(B), \ + (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), \ (int)(I), \ - (__v32hi)(S), \ + (__v32hi)(__m512i)(S), \ (__mmask32)(U)) #define _mm512_maskz_shldi_epi16(U, A, B, I) \ @@ -182,10 +182,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ - (__v8di)(B), \ + (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), \ (int)(I), \ - (__v8di)(S), \ + (__v8di)(__m512i)(S), \ (__mmask8)(U)) #define _mm512_maskz_shrdi_epi64(U, A, B, I) \ @@ -195,10 +195,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ - (__v16si)(B), \ + (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(__m512i)(A), \ + (__v16si)(__m512i)(B), \ (int)(I), \ - (__v16si)(S), \ + (__v16si)(__m512i)(S), \ (__mmask16)(U)) #define _mm512_maskz_shrdi_epi32(U, A, B, I) \ @@ -208,10 +208,10 @@ _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ - (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ - (__v32hi)(B), \ + (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(__m512i)(A), \ + (__v32hi)(__m512i)(B), \ (int)(I), \ - (__v32hi)(S), \ + (__v32hi)(__m512i)(S), \ (__mmask32)(U)) #define _mm512_maskz_shrdi_epi16(U, A, B, I) \ diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h index 53beef2abd..96836527da 100644 --- a/lib/Headers/avx512vldqintrin.h +++ b/lib/Headers/avx512vldqintrin.h @@ -1094,7 +1094,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ - (__v2df)(W)) + (__v2df)(__m128d)(W)) #define _mm256_maskz_extractf64x2_pd(U, A, imm) \ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ @@ -1110,7 +1110,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ - (__v2di)(W)) + (__v2di)(__m128i)(W)) #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ @@ -1118,7 +1118,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v2di)_mm_setzero_si128()) #define _mm256_insertf64x2(A, B, imm) \ - (__m256d)__builtin_shufflevector((__v4df)(A), \ + (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \ (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \ ((imm) & 0x1) ? 0 : 4, \ ((imm) & 0x1) ? 1 : 5, \ @@ -1128,7 +1128,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ - (__v4df)(W)) + (__v4df)(__m256d)(W)) #define _mm256_maskz_insertf64x2(U, A, B, imm) \ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ @@ -1136,7 +1136,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) (__v4df)_mm256_setzero_pd()) #define _mm256_inserti64x2(A, B, imm) \ - (__m256i)__builtin_shufflevector((__v4di)(A), \ + (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \ (__v4di)_mm256_castsi128_si256((__m128i)(B)), \ ((imm) & 0x1) ? 0 : 4, \ ((imm) & 0x1) ? 1 : 5, \ @@ -1146,7 +1146,7 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ - (__v4di)(W)) + (__v4di)(__m256i)(W)) #define _mm256_maskz_inserti64x2(U, A, B, imm) \ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index cc091e6c36..2b9f14b99f 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -7729,7 +7729,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ - (__v4sf)(W)) + (__v4sf)(__m128)(W)) #define _mm256_maskz_extractf32x4_ps(U, A, imm) \ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ @@ -7747,7 +7747,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ - (__v4si)(W)) + (__v4si)(__m128i)(W)) #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ @@ -7755,7 +7755,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v4si)_mm_setzero_si128()) #define _mm256_insertf32x4(A, B, imm) \ - (__m256)__builtin_shufflevector((__v8sf)(A), \ + (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \ (__v8sf)_mm256_castps128_ps256((__m128)(B)), \ ((imm) & 0x1) ? 0 : 8, \ ((imm) & 0x1) ? 1 : 9, \ @@ -7769,7 +7769,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_mask_insertf32x4(W, U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ - (__v8sf)(W)) + (__v8sf)(__m256)(W)) #define _mm256_maskz_insertf32x4(U, A, B, imm) \ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ @@ -7777,7 +7777,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) (__v8sf)_mm256_setzero_ps()) #define _mm256_inserti32x4(A, B, imm) \ - (__m256i)__builtin_shufflevector((__v8si)(A), \ + (__m256i)__builtin_shufflevector((__v8si)(__m256i)(A), \ (__v8si)_mm256_castsi128_si256((__m128i)(B)), \ ((imm) & 0x1) ? 0 : 8, \ ((imm) & 0x1) ? 1 : 9, \ @@ -7791,7 +7791,7 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) #define _mm256_mask_inserti32x4(W, U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ - (__v8si)(W)) + (__v8si)(__m256i)(W)) #define _mm256_maskz_inserti32x4(U, A, B, imm) \ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ diff --git a/lib/Headers/avx512vlvbmi2intrin.h b/lib/Headers/avx512vlvbmi2intrin.h index 06dbb2ddc1..94fac5117f 100644 --- a/lib/Headers/avx512vlvbmi2intrin.h +++ b/lib/Headers/avx512vlvbmi2intrin.h @@ -252,10 +252,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) } #define _mm256_mask_shldi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \ - (__v4di)(B), \ + (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), \ (int)(I), \ - (__v4di)(S), \ + (__v4di)(__m256i)(S), \ (__mmask8)(U)) #define _mm256_maskz_shldi_epi64(U, A, B, I) \ @@ -265,10 +265,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) #define _mm_mask_shldi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ - (__v2di)(B), \ + (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), \ (int)(I), \ - (__v2di)(S), \ + (__v2di)(__m128i)(S), \ (__mmask8)(U)) #define _mm_maskz_shldi_epi64(U, A, B, I) \ @@ -278,10 +278,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shldi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ - (__v8si)(B), \ + (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), \ (int)(I), \ - (__v8si)(S), \ + (__v8si)(__m256i)(S), \ (__mmask8)(U)) #define _mm256_maskz_shldi_epi32(U, A, B, I) \ @@ -291,10 +291,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) #define _mm_mask_shldi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ - (__v4si)(B), \ + (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), \ (int)(I), \ - (__v4si)(S), \ + (__v4si)(__m128i)(S), \ (__mmask8)(U)) #define _mm_maskz_shldi_epi32(U, A, B, I) \ @@ -304,10 +304,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shldi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ - (__v16hi)(B), \ + (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), \ (int)(I), \ - (__v16hi)(S), \ + (__v16hi)(__m256i)(S), \ (__mmask16)(U)) #define _mm256_maskz_shldi_epi16(U, A, B, I) \ @@ -317,10 +317,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) #define _mm_mask_shldi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ - (__v8hi)(B), \ + (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), \ (int)(I), \ - (__v8hi)(S), \ + (__v8hi)(__m128i)(S), \ (__mmask8)(U)) #define _mm_maskz_shldi_epi16(U, A, B, I) \ @@ -330,10 +330,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ - (__v4di)(B), \ + (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), \ (int)(I), \ - (__v4di)(S), \ + (__v4di)(__m256i)(S), \ (__mmask8)(U)) #define _mm256_maskz_shrdi_epi64(U, A, B, I) \ @@ -343,10 +343,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) #define _mm_mask_shrdi_epi64(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ - (__v2di)(B), \ + (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), \ (int)(I), \ - (__v2di)(S), \ + (__v2di)(__m128i)(S), \ (__mmask8)(U)) #define _mm_maskz_shrdi_epi64(U, A, B, I) \ @@ -356,10 +356,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ - (__v8si)(B), \ + (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(__m256i)(A), \ + (__v8si)(__m256i)(B), \ (int)(I), \ - (__v8si)(S), \ + (__v8si)(__m256i)(S), \ (__mmask8)(U)) #define _mm256_maskz_shrdi_epi32(U, A, B, I) \ @@ -369,10 +369,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) #define _mm_mask_shrdi_epi32(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ - (__v4si)(B), \ + (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(__m128i)(A), \ + (__v4si)(__m128i)(B), \ (int)(I), \ - (__v4si)(S), \ + (__v4si)(__m128i)(S), \ (__mmask8)(U)) #define _mm_maskz_shrdi_epi32(U, A, B, I) \ @@ -382,10 +382,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) #define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ - (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ - (__v16hi)(B), \ + (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(__m256i)(A), \ + (__v16hi)(__m256i)(B), \ (int)(I), \ - (__v16hi)(S), \ + (__v16hi)(__m256i)(S), \ (__mmask16)(U)) #define _mm256_maskz_shrdi_epi16(U, A, B, I) \ @@ -395,10 +395,10 @@ _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) #define _mm_mask_shrdi_epi16(S, U, A, B, I) \ - (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ - (__v8hi)(B), \ + (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(__m128i)(A), \ + (__v8hi)(__m128i)(B), \ (int)(I), \ - (__v8hi)(S), \ + (__v8hi)(__m128i)(S), \ (__mmask8)(U)) #define _mm_maskz_shrdi_epi16(U, A, B, I) \