TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x8_mask, "V16fV16fV8fIiV16fUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_insertf64x2_512_mask, "V8dV8dV2dIiV8dUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_inserti32x8_mask, "V16iV16iV8iIiV16iUs","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_512_mask, "V8LLiV8LLiV2LLiIiV8LLiUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_insertf64x4_mask, "V8dV8dV4dIiV8dUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_inserti64x4_mask, "V8LLiV8LLiV4LLiIiV8LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_insertf64x2_256_mask, "V4dV4dV2dIiV4dUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_256_mask, "V4LLiV4LLiV2LLiIiV4LLiUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x4_256_mask, "V8fV8fV4fIiV8fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_inserti32x4_256_mask, "V8iV8iV4iIiV8iUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x4_mask, "V16fV16fV4fIiV16fUs","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_inserti32x4_mask, "V16iV16iV4iIiV16iUs","","avx512f")
TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2diV2dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4diV4dUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fiV4fUc","","avx512vl")
(__v2di)_mm_setzero_di()); })
#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
- (__v8sf)(__m256)(B), (int)(imm), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1); })
+ (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_castps256_ps512((__m256)(B)),\
+ ((imm) & 0x1) ? 0 : 16, \
+ ((imm) & 0x1) ? 1 : 17, \
+ ((imm) & 0x1) ? 2 : 18, \
+ ((imm) & 0x1) ? 3 : 19, \
+ ((imm) & 0x1) ? 4 : 20, \
+ ((imm) & 0x1) ? 5 : 21, \
+ ((imm) & 0x1) ? 6 : 22, \
+ ((imm) & 0x1) ? 7 : 23, \
+ ((imm) & 0x1) ? 16 : 8, \
+ ((imm) & 0x1) ? 17 : 9, \
+ ((imm) & 0x1) ? 18 : 10, \
+ ((imm) & 0x1) ? 19 : 11, \
+ ((imm) & 0x1) ? 20 : 12, \
+ ((imm) & 0x1) ? 21 : 13, \
+ ((imm) & 0x1) ? 22 : 14, \
+ ((imm) & 0x1) ? 23 : 15); })
#define _mm512_mask_insertf32x8(W, U, A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
- (__v8sf)(__m256)(B), (int)(imm), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
+ (__v16sf)(W)); })
#define _mm512_maskz_insertf32x8(U, A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
- (__v8sf)(__m256)(B), (int)(imm), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_insertf64x2(A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(imm), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1); })
+ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_castpd128_pd512((__m128d)(B)),\
+ (((imm) & 0x3) == 0) ? 8 : 0, \
+ (((imm) & 0x3) == 0) ? 9 : 1, \
+ (((imm) & 0x3) == 1) ? 8 : 2, \
+ (((imm) & 0x3) == 1) ? 9 : 3, \
+ (((imm) & 0x3) == 2) ? 8 : 4, \
+ (((imm) & 0x3) == 2) ? 9 : 5, \
+ (((imm) & 0x3) == 3) ? 8 : 6, \
+ (((imm) & 0x3) == 3) ? 9 : 7); })
#define _mm512_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(imm), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
+ (__v8df)(W)); })
#define _mm512_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_insertf64x2_512_mask((__v8df)(__m512d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(imm), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_inserti32x8(A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
- (__v8si)(__m256i)(B), (int)(imm), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)-1); })
+ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
+ (__v16si)_mm512_castsi256_si512((__m256i)(B)),\
+ ((imm) & 0x1) ? 0 : 16, \
+ ((imm) & 0x1) ? 1 : 17, \
+ ((imm) & 0x1) ? 2 : 18, \
+ ((imm) & 0x1) ? 3 : 19, \
+ ((imm) & 0x1) ? 4 : 20, \
+ ((imm) & 0x1) ? 5 : 21, \
+ ((imm) & 0x1) ? 6 : 22, \
+ ((imm) & 0x1) ? 7 : 23, \
+ ((imm) & 0x1) ? 16 : 8, \
+ ((imm) & 0x1) ? 17 : 9, \
+ ((imm) & 0x1) ? 18 : 10, \
+ ((imm) & 0x1) ? 19 : 11, \
+ ((imm) & 0x1) ? 20 : 12, \
+ ((imm) & 0x1) ? 21 : 13, \
+ ((imm) & 0x1) ? 22 : 14, \
+ ((imm) & 0x1) ? 23 : 15); })
#define _mm512_mask_inserti32x8(W, U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
- (__v8si)(__m256i)(B), (int)(imm), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
+ (__v16si)(W)); })
#define _mm512_maskz_inserti32x8(U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti32x8_mask((__v16si)(__m512i)(A), \
- (__v8si)(__m256i)(B), (int)(imm), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
+ (__v16si)_mm512_setzero_si512()); })
#define _mm512_inserti64x2(A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
- (__v2di)(__m128i)(B), \
- (int)(imm), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1); })
+ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+ (__v8di)_mm512_castsi128_si512((__m128i)(B)),\
+ (((imm) & 0x3) == 0) ? 8 : 0, \
+ (((imm) & 0x3) == 0) ? 9 : 1, \
+ (((imm) & 0x3) == 1) ? 8 : 2, \
+ (((imm) & 0x3) == 1) ? 9 : 3, \
+ (((imm) & 0x3) == 2) ? 8 : 4, \
+ (((imm) & 0x3) == 2) ? 9 : 5, \
+ (((imm) & 0x3) == 3) ? 8 : 6, \
+ (((imm) & 0x3) == 3) ? 9 : 7); })
#define _mm512_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
- (__v2di)(__m128i)(B), \
- (int)(imm), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U)); })
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
+ (__v8di)(W)); })
#define _mm512_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti64x2_512_mask((__v8di)(__m512i)(A), \
- (__v2di)(__m128i)(B), \
- (int)(imm), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U)); })
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
+ (__v8di)_mm512_setzero_si512()); })
#define _mm512_mask_fpclass_ps_mask(U, A, imm) __extension__ ({ \
(__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
(__v4di)_mm256_setzero_si256()); })
#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
- (__v4df)(__m256d)(B), (int)(imm), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1); })
+ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_castpd256_pd512((__m256d)(B)), \
+ ((imm) & 0x1) ? 0 : 8, \
+ ((imm) & 0x1) ? 1 : 9, \
+ ((imm) & 0x1) ? 2 : 10, \
+ ((imm) & 0x1) ? 3 : 11, \
+ ((imm) & 0x1) ? 8 : 4, \
+ ((imm) & 0x1) ? 9 : 5, \
+ ((imm) & 0x1) ? 10 : 6, \
+ ((imm) & 0x1) ? 11 : 7); })
#define _mm512_mask_insertf64x4(W, U, A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
- (__v4df)(__m256d)(B), (int)(imm), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
+ (__v8df)(W)); })
#define _mm512_maskz_insertf64x4(U, A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
- (__v4df)(__m256d)(B), (int)(imm), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_inserti64x4(A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
- (__v4di)(__m256i)(B), (int)(imm), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1); })
+ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+ (__v8di)_mm512_castsi256_si512((__m256i)(B)), \
+ ((imm) & 0x1) ? 0 : 8, \
+ ((imm) & 0x1) ? 1 : 9, \
+ ((imm) & 0x1) ? 2 : 10, \
+ ((imm) & 0x1) ? 3 : 11, \
+ ((imm) & 0x1) ? 8 : 4, \
+ ((imm) & 0x1) ? 9 : 5, \
+ ((imm) & 0x1) ? 10 : 6, \
+ ((imm) & 0x1) ? 11 : 7); })
#define _mm512_mask_inserti64x4(W, U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
- (__v4di)(__m256i)(B), (int)(imm), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U)); })
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
+ (__v8di)(W)); })
#define _mm512_maskz_inserti64x4(U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti64x4_mask((__v8di)(__m512i)(A), \
- (__v4di)(__m256i)(B), (int)(imm), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U)); })
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
+ (__v8di)_mm512_setzero_si512()); })
#define _mm512_insertf32x4(A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
- (__v4sf)(__m128)(B), (int)(imm), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1); })
+ (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_castps128_ps512((__m128)(B)),\
+ (((imm) & 0x3) == 0) ? 16 : 0, \
+ (((imm) & 0x3) == 0) ? 17 : 1, \
+ (((imm) & 0x3) == 0) ? 18 : 2, \
+ (((imm) & 0x3) == 0) ? 19 : 3, \
+ (((imm) & 0x3) == 1) ? 16 : 4, \
+ (((imm) & 0x3) == 1) ? 17 : 5, \
+ (((imm) & 0x3) == 1) ? 18 : 6, \
+ (((imm) & 0x3) == 1) ? 19 : 7, \
+ (((imm) & 0x3) == 2) ? 16 : 8, \
+ (((imm) & 0x3) == 2) ? 17 : 9, \
+ (((imm) & 0x3) == 2) ? 18 : 10, \
+ (((imm) & 0x3) == 2) ? 19 : 11, \
+ (((imm) & 0x3) == 3) ? 16 : 12, \
+ (((imm) & 0x3) == 3) ? 17 : 13, \
+ (((imm) & 0x3) == 3) ? 18 : 14, \
+ (((imm) & 0x3) == 3) ? 19 : 15); })
#define _mm512_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
- (__v4sf)(__m128)(B), (int)(imm), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
+ (__v16sf)(W)); })
#define _mm512_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_insertf32x4_mask((__v16sf)(__m512)(A), \
- (__v4sf)(__m128)(B), (int)(imm), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_inserti32x4(A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
- (__v4si)(__m128i)(B), (int)(imm), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)-1); })
+ (__m512i)__builtin_shufflevector((__v16si)(__m512i)(A), \
+ (__v16si)_mm512_castsi128_si512((__m128i)(B)),\
+ (((imm) & 0x3) == 0) ? 16 : 0, \
+ (((imm) & 0x3) == 0) ? 17 : 1, \
+ (((imm) & 0x3) == 0) ? 18 : 2, \
+ (((imm) & 0x3) == 0) ? 19 : 3, \
+ (((imm) & 0x3) == 1) ? 16 : 4, \
+ (((imm) & 0x3) == 1) ? 17 : 5, \
+ (((imm) & 0x3) == 1) ? 18 : 6, \
+ (((imm) & 0x3) == 1) ? 19 : 7, \
+ (((imm) & 0x3) == 2) ? 16 : 8, \
+ (((imm) & 0x3) == 2) ? 17 : 9, \
+ (((imm) & 0x3) == 2) ? 18 : 10, \
+ (((imm) & 0x3) == 2) ? 19 : 11, \
+ (((imm) & 0x3) == 3) ? 16 : 12, \
+ (((imm) & 0x3) == 3) ? 17 : 13, \
+ (((imm) & 0x3) == 3) ? 18 : 14, \
+ (((imm) & 0x3) == 3) ? 19 : 15); })
#define _mm512_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
- (__v4si)(__m128i)(B), (int)(imm), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
+ (__v16si)(W)); })
#define _mm512_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_inserti32x4_mask((__v16si)(__m512i)(A), \
- (__v4si)(__m128i)(B), (int)(imm), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
+ (__v16si)_mm512_setzero_si512()); })
#define _mm512_getmant_round_pd(A, B, C, R) __extension__ ({ \
(__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
(__v2di)_mm_setzero_di()); })
#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1); })
+ (__m256d)__builtin_shufflevector((__v4df)(A), \
+ (__v4df)_mm256_castpd128_pd256((__m128d)(B)), \
+ ((imm) & 0x1) ? 0 : 4, \
+ ((imm) & 0x1) ? 1 : 5, \
+ ((imm) & 0x1) ? 4 : 2, \
+ ((imm) & 0x1) ? 5 : 3); })
#define _mm256_mask_insertf64x2(W, U, A, B, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(imm), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
+ (__v4df)(W)); })
#define _mm256_maskz_insertf64x2(U, A, B, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
- (__v2df)(__m128d)(B), \
- (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
+ (__v4df)_mm256_setzero_pd()); })
#define _mm256_inserti64x2(A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
- (__v2di)(__m128i)(B), \
- (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)-1); })
+ (__m256i)__builtin_shufflevector((__v4di)(A), \
+ (__v4di)_mm256_castsi128_si256((__m128i)(B)), \
+ ((imm) & 0x1) ? 0 : 4, \
+ ((imm) & 0x1) ? 1 : 5, \
+ ((imm) & 0x1) ? 4 : 2, \
+ ((imm) & 0x1) ? 5 : 3); })
#define _mm256_mask_inserti64x2(W, U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
- (__v2di)(__m128i)(B), \
- (int)(imm), \
- (__v4di)(__m256i)(W), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
+ (__v4di)(W)); })
#define _mm256_maskz_inserti64x2(U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_inserti64x2_256_mask((__v4di)(__m256i)(A), \
- (__v2di)(__m128i)(B), \
- (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
+ (__v4di)_mm256_setzero_si256()); })
#define _mm_mask_fpclass_pd_mask(U, A, imm) __extension__ ({ \
(__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
(__v4si)_mm_setzero_si128()); })
#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
- (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
- (__v4sf)(__m128)(B), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1); })
+ (__m256)__builtin_shufflevector((__v8sf)(A), \
+ (__v8sf)_mm256_castps128_ps256((__m128)(B)), \
+ ((imm) & 0x1) ? 0 : 8, \
+ ((imm) & 0x1) ? 1 : 9, \
+ ((imm) & 0x1) ? 2 : 10, \
+ ((imm) & 0x1) ? 3 : 11, \
+ ((imm) & 0x1) ? 8 : 4, \
+ ((imm) & 0x1) ? 9 : 5, \
+ ((imm) & 0x1) ? 10 : 6, \
+ ((imm) & 0x1) ? 11 : 7); })
#define _mm256_mask_insertf32x4(W, U, A, B, imm) __extension__ ({ \
- (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
- (__v4sf)(__m128)(B), (int)(imm), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
+ (__v8sf)(W)); })
#define _mm256_maskz_insertf32x4(U, A, B, imm) __extension__ ({ \
- (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
- (__v4sf)(__m128)(B), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
+ (__v8sf)_mm256_setzero_ps()); })
#define _mm256_inserti32x4(A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
- (__v4si)(__m128i)(B), \
- (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1); })
+ (__m256i)__builtin_shufflevector((__v8si)(A), \
+ (__v8si)_mm256_castsi128_si256((__m128i)(B)), \
+ ((imm) & 0x1) ? 0 : 8, \
+ ((imm) & 0x1) ? 1 : 9, \
+ ((imm) & 0x1) ? 2 : 10, \
+ ((imm) & 0x1) ? 3 : 11, \
+ ((imm) & 0x1) ? 8 : 4, \
+ ((imm) & 0x1) ? 9 : 5, \
+ ((imm) & 0x1) ? 10 : 6, \
+ ((imm) & 0x1) ? 11 : 7); })
#define _mm256_mask_inserti32x4(W, U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
- (__v4si)(__m128i)(B), \
- (int)(imm), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
+ (__v8si)(W)); })
#define _mm256_maskz_inserti32x4(U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_inserti32x4_256_mask((__v8si)(__m256i)(A), \
- (__v4si)(__m128i)(B), \
- (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
+ (__v8si)_mm256_setzero_si256()); })
#define _mm_getmant_pd(A, B, C) __extension__({\
(__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
case X86::BI_mm_prefetch:
i = 1; l = 0; u = 3;
break;
- case X86::BI__builtin_ia32_insertf32x8_mask:
- case X86::BI__builtin_ia32_inserti32x8_mask:
- case X86::BI__builtin_ia32_insertf64x4_mask:
- case X86::BI__builtin_ia32_inserti64x4_mask:
- case X86::BI__builtin_ia32_insertf64x2_256_mask:
- case X86::BI__builtin_ia32_inserti64x2_256_mask:
- case X86::BI__builtin_ia32_insertf32x4_256_mask:
- case X86::BI__builtin_ia32_inserti32x4_256_mask:
- i = 2; l = 0; u = 1;
- break;
case X86::BI__builtin_ia32_sha1rnds4:
case X86::BI__builtin_ia32_shuf_f32x4_256_mask:
case X86::BI__builtin_ia32_shuf_f64x2_256_mask:
case X86::BI__builtin_ia32_shuf_i32x4_256_mask:
case X86::BI__builtin_ia32_shuf_i64x2_256_mask:
- case X86::BI__builtin_ia32_insertf64x2_512_mask:
- case X86::BI__builtin_ia32_inserti64x2_512_mask:
- case X86::BI__builtin_ia32_insertf32x4_mask:
- case X86::BI__builtin_ia32_inserti32x4_mask:
i = 2; l = 0; u = 3;
break;
case X86::BI__builtin_ia32_vpermil2pd:
__m512 test_mm512_insertf32x8(__m512 __A, __m256 __B) {
// CHECK-LABEL: @test_mm512_insertf32x8
- // CHECK: @llvm.x86.avx512.mask.insertf32x8
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
return _mm512_insertf32x8(__A, __B, 1);
}
__m512 test_mm512_mask_insertf32x8(__m512 __W, __mmask16 __U, __m512 __A, __m256 __B) {
// CHECK-LABEL: @test_mm512_mask_insertf32x8
- // CHECK: @llvm.x86.avx512.mask.insertf32x8
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_insertf32x8(__W, __U, __A, __B, 1);
}
__m512 test_mm512_maskz_insertf32x8(__mmask16 __U, __m512 __A, __m256 __B) {
// CHECK-LABEL: @test_mm512_maskz_insertf32x8
- // CHECK: @llvm.x86.avx512.mask.insertf32x8
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_insertf32x8(__U, __A, __B, 1);
}
__m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) {
// CHECK-LABEL: @test_mm512_insertf64x2
- // CHECK: @llvm.x86.avx512.mask.insertf64x2
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
return _mm512_insertf64x2(__A, __B, 3);
}
__m512d test_mm512_mask_insertf64x2(__m512d __W, __mmask8 __U, __m512d __A, __m128d __B) {
// CHECK-LABEL: @test_mm512_mask_insertf64x2
- // CHECK: @llvm.x86.avx512.mask.insertf64x2
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_insertf64x2(__W, __U, __A, __B, 3);
}
__m512d test_mm512_maskz_insertf64x2(__mmask8 __U, __m512d __A, __m128d __B) {
// CHECK-LABEL: @test_mm512_maskz_insertf64x2
- // CHECK: @llvm.x86.avx512.mask.insertf64x2
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_insertf64x2(__U, __A, __B, 3);
}
__m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) {
// CHECK-LABEL: @test_mm512_inserti32x8
- // CHECK: @llvm.x86.avx512.mask.inserti32x8
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
return _mm512_inserti32x8(__A, __B, 1);
}
__m512i test_mm512_mask_inserti32x8(__m512i __W, __mmask16 __U, __m512i __A, __m256i __B) {
// CHECK-LABEL: @test_mm512_mask_inserti32x8
- // CHECK: @llvm.x86.avx512.mask.inserti32x8
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_inserti32x8(__W, __U, __A, __B, 1);
}
__m512i test_mm512_maskz_inserti32x8(__mmask16 __U, __m512i __A, __m256i __B) {
// CHECK-LABEL: @test_mm512_maskz_inserti32x8
- // CHECK: @llvm.x86.avx512.mask.inserti32x8
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_inserti32x8(__U, __A, __B, 1);
}
__m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) {
// CHECK-LABEL: @test_mm512_inserti64x2
- // CHECK: @llvm.x86.avx512.mask.inserti64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
return _mm512_inserti64x2(__A, __B, 1);
}
__m512i test_mm512_mask_inserti64x2(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: @test_mm512_mask_inserti64x2
- // CHECK: @llvm.x86.avx512.mask.inserti64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_inserti64x2(__W, __U, __A, __B, 1);
}
__m512i test_mm512_maskz_inserti64x2(__mmask8 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: @test_mm512_maskz_inserti64x2
- // CHECK: @llvm.x86.avx512.mask.inserti64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_inserti64x2(__U, __A, __B, 1);
}
__mmask8 test_mm512_mask_fpclass_pd_mask(__mmask8 __U, __m512d __A) {
__m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) {
// CHECK-LABEL: @test_mm512_insertf64x4
- // CHECK: @llvm.x86.avx512.mask.insertf64x4
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm512_insertf64x4(__A, __B, 1);
}
__m512d test_mm512_mask_insertf64x4(__m512d __W, __mmask8 __U, __m512d __A, __m256d __B) {
// CHECK-LABEL: @test_mm512_mask_insertf64x4
- // CHECK: @llvm.x86.avx512.mask.insertf64x4
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_insertf64x4(__W, __U, __A, __B, 1);
}
__m512d test_mm512_maskz_insertf64x4(__mmask8 __U, __m512d __A, __m256d __B) {
// CHECK-LABEL: @test_mm512_maskz_insertf64x4
- // CHECK: @llvm.x86.avx512.mask.insertf64x4
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_insertf64x4(__U, __A, __B, 1);
}
__m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) {
// CHECK-LABEL: @test_mm512_inserti64x4
- // CHECK: @llvm.x86.avx512.mask.inserti64x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm512_inserti64x4(__A, __B, 1);
}
__m512i test_mm512_mask_inserti64x4(__m512i __W, __mmask8 __U, __m512i __A, __m256i __B) {
// CHECK-LABEL: @test_mm512_mask_inserti64x4
- // CHECK: @llvm.x86.avx512.mask.inserti64x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_inserti64x4(__W, __U, __A, __B, 1);
}
__m512i test_mm512_maskz_inserti64x4(__mmask8 __U, __m512i __A, __m256i __B) {
// CHECK-LABEL: @test_mm512_maskz_inserti64x4
- // CHECK: @llvm.x86.avx512.mask.inserti64x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_inserti64x4(__U, __A, __B, 1);
}
__m512 test_mm512_insertf32x4(__m512 __A, __m128 __B) {
// CHECK-LABEL: @test_mm512_insertf32x4
- // CHECK: @llvm.x86.avx512.mask.insertf32x4
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_insertf32x4(__A, __B, 1);
}
__m512 test_mm512_mask_insertf32x4(__m512 __W, __mmask16 __U, __m512 __A, __m128 __B) {
// CHECK-LABEL: @test_mm512_mask_insertf32x4
- // CHECK: @llvm.x86.avx512.mask.insertf32x4
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_insertf32x4(__W, __U, __A, __B, 1);
}
__m512 test_mm512_maskz_insertf32x4(__mmask16 __U, __m512 __A, __m128 __B) {
// CHECK-LABEL: @test_mm512_maskz_insertf32x4
- // CHECK: @llvm.x86.avx512.mask.insertf32x4
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_insertf32x4(__U, __A, __B, 1);
}
__m512i test_mm512_inserti32x4(__m512i __A, __m128i __B) {
// CHECK-LABEL: @test_mm512_inserti32x4
- // CHECK: @llvm.x86.avx512.mask.inserti32x4
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_inserti32x4(__A, __B, 1);
}
__m512i test_mm512_mask_inserti32x4(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: @test_mm512_mask_inserti32x4
- // CHECK: @llvm.x86.avx512.mask.inserti32x4
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_inserti32x4(__W, __U, __A, __B, 1);
}
__m512i test_mm512_maskz_inserti32x4(__mmask16 __U, __m512i __A, __m128i __B) {
// CHECK-LABEL: @test_mm512_maskz_inserti32x4
- // CHECK: @llvm.x86.avx512.mask.inserti32x4
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_inserti32x4(__U, __A, __B, 1);
}
__m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) {
// CHECK-LABEL: @test_mm256_insertf32x4
- // CHECK: @llvm.x86.avx512.mask.insertf32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_insertf32x4(__A, __B, 1);
}
__m256 test_mm256_mask_insertf32x4(__m256 __W, __mmask8 __U, __m256 __A, __m128 __B) {
// CHECK-LABEL: @test_mm256_mask_insertf32x4
- // CHECK: @llvm.x86.avx512.mask.insertf32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_insertf32x4(__W, __U, __A, __B, 1);
}
__m256 test_mm256_maskz_insertf32x4(__mmask8 __U, __m256 __A, __m128 __B) {
// CHECK-LABEL: @test_mm256_maskz_insertf32x4
- // CHECK: @llvm.x86.avx512.mask.insertf32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_insertf32x4(__U, __A, __B, 1);
}
__m256i test_mm256_inserti32x4(__m256i __A, __m128i __B) {
// CHECK-LABEL: @test_mm256_inserti32x4
- // CHECK: @llvm.x86.avx512.mask.inserti32x4
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_inserti32x4(__A, __B, 1);
}
__m256i test_mm256_mask_inserti32x4(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: @test_mm256_mask_inserti32x4
- // CHECK: @llvm.x86.avx512.mask.inserti32x4
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_inserti32x4(__W, __U, __A, __B, 1);
}
__m256i test_mm256_maskz_inserti32x4(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: @test_mm256_maskz_inserti32x4
- // CHECK: @llvm.x86.avx512.mask.inserti32x4
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_inserti32x4(__U, __A, __B, 1);
}
__m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) {
// CHECK-LABEL: @test_mm256_insertf64x2
- // CHECK: @llvm.x86.avx512.mask.insertf64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
return _mm256_insertf64x2(__A, __B, 1);
}
__m256d test_mm256_mask_insertf64x2(__m256d __W, __mmask8 __U, __m256d __A, __m128d __B) {
// CHECK-LABEL: @test_mm256_mask_insertf64x2
- // CHECK: @llvm.x86.avx512.mask.insertf64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_insertf64x2(__W, __U, __A, __B, 1);
}
__m256d test_mm256_maskz_insertf64x2(__mmask8 __U, __m256d __A, __m128d __B) {
// CHECK-LABEL: @test_mm256_maskz_insertf64x2
- // CHECK: @llvm.x86.avx512.mask.insertf64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_insertf64x2(__U, __A, __B, 1);
}
__m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) {
// CHECK-LABEL: @test_mm256_inserti64x2
- // CHECK: @llvm.x86.avx512.mask.inserti64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
return _mm256_inserti64x2(__A, __B, 1);
}
__m256i test_mm256_mask_inserti64x2(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: @test_mm256_mask_inserti64x2
- // CHECK: @llvm.x86.avx512.mask.inserti64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_inserti64x2(__W, __U, __A, __B, 1);
}
__m256i test_mm256_maskz_inserti64x2(__mmask8 __U, __m256i __A, __m128i __B) {
// CHECK-LABEL: @test_mm256_maskz_inserti64x2
- // CHECK: @llvm.x86.avx512.mask.inserti64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_inserti64x2(__U, __A, __B, 1);
}