TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "", "avx512f")
TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUci","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUci","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc","","avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc","","avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc","","avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc","","avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc","","avx512vl")
TARGET_BUILTIN(__builtin_ia32_insertf32x8_mask, "V16fV16fV8fIiV16fUs","","avx512dq")
TARGET_BUILTIN(__builtin_ia32_insertf64x2_512_mask, "V8dV8dV2dIiV8dUc","","avx512dq")
TARGET_BUILTIN(__builtin_ia32_inserti32x8_mask, "V16iV16iV8iIiV16iUs","","avx512dq")
}
#define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \
- (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1); })
+ (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ ((imm) & 1) ? 8 : 0, \
+ ((imm) & 1) ? 9 : 1, \
+ ((imm) & 1) ? 10 : 2, \
+ ((imm) & 1) ? 11 : 3, \
+ ((imm) & 1) ? 12 : 4, \
+ ((imm) & 1) ? 13 : 5, \
+ ((imm) & 1) ? 14 : 6, \
+ ((imm) & 1) ? 15 : 7); })
#define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \
- (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
+ (__v8sf)(W)); })
#define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \
- (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \
+ (__v8sf)_mm256_setzero_ps()); })
#define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1); })
+ (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_undefined_pd(), \
+ 0 + ((imm) & 0x3) * 2, \
+ 1 + ((imm) & 0x3) * 2); })
#define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
+ (__v2df)(W)); })
#define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm512_extractf64x2_pd((A), (imm)), \
+ (__v2df)_mm_setzero_pd()); })
#define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1); })
+ (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \
+ (__v16si)_mm512_undefined_epi32(), \
+ ((imm) & 1) ? 8 : 0, \
+ ((imm) & 1) ? 9 : 1, \
+ ((imm) & 1) ? 10 : 2, \
+ ((imm) & 1) ? 11 : 3, \
+ ((imm) & 1) ? 12 : 4, \
+ ((imm) & 1) ? 13 : 5, \
+ ((imm) & 1) ? 14 : 6, \
+ ((imm) & 1) ? 15 : 7); })
#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
+ (__v8si)(W)); })
#define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \
+ (__v8si)_mm256_setzero_si256()); })
#define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_di(), \
- (__mmask8)-1); })
+ (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+ (__v8di)_mm512_undefined_epi32(), \
+ 0 + ((imm) & 0x3) * 2, \
+ 1 + ((imm) & 0x3) * 2); })
#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
- (int)(imm), \
- (__v2di)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
+ (__v2di)(W)); })
#define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_di(), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \
+ (__v2di)_mm_setzero_di()); })
#define _mm512_insertf32x8(A, B, imm) __extension__ ({ \
(__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \
(__mmask16)(U)); })
/* Vector Extract */
-#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
- (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
- (__v4df)_mm256_setzero_si256(), \
- (__mmask8)-1); })
+#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \
+ (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_undefined_pd(), \
+ ((I) & 1) ? 4 : 0, \
+ ((I) & 1) ? 5 : 1, \
+ ((I) & 1) ? 6 : 2, \
+ ((I) & 1) ? 7 : 3); })
#define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\
- (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
+ (__v4df)(W)); })
#define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\
- (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm512_extractf64x4_pd((A), (imm)), \
+ (__v4df)_mm256_setzero_pd()); })
-#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
- (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1); })
+#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \
+ (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_undefined_ps(), \
+ 0 + ((I) & 0x3) * 4, \
+ 1 + ((I) & 0x3) * 4, \
+ 2 + ((I) & 0x3) * 4, \
+ 3 + ((I) & 0x3) * 4); })
#define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\
- (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U)); })
+ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
+ (__v4sf)(W)); })
#define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\
- (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U)); })
+ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \
+ (__v4sf)_mm_setzero_ps()); })
+
/* Vector Blend */
static __inline __m512d __DEFAULT_FN_ATTRS
__builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
}
-#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_undefined_si128(), \
- (__mmask8)-1); })
+#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \
+ (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \
+ (__v16si)_mm512_undefined_epi32(), \
+ 0 + ((imm) & 0x3) * 4, \
+ 1 + ((imm) & 0x3) * 4, \
+ 2 + ((imm) & 0x3) * 4, \
+ 3 + ((imm) & 0x3) * 4); })
#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
+ (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
+ (__v4si)__W); })
#define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \
+ (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \
+ (__v4si)_mm_setzero_si128()); })
-#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_undefined_si256(), \
- (__mmask8)-1); })
+#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \
+ (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+ (__v8di)_mm512_undefined_epi32(), \
+ ((imm) & 1) ? 4 : 0, \
+ ((imm) & 1) ? 5 : 1, \
+ ((imm) & 1) ? 6 : 2, \
+ ((imm) & 1) ? 7 : 3); })
#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)(__m256i)(W), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
+ (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
+ (__v4di)__W); })
#define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
+ (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \
+ (__v4di)_mm256_setzero_si256()); })
#define _mm512_insertf64x4(A, B, imm) __extension__ ({ \
(__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \
}
#define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1); })
+ (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \
+ (__v4df)_mm256_undefined_pd(), \
+ ((imm) & 1) ? 2 : 0, \
+ ((imm) & 1) ? 3 : 1); })
#define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)(__m128d)(W), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
+ (__v2df)(W)); })
#define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \
- (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
- (int)(imm), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
+ (__v2df)_mm256_extractf64x2_pd((A), (imm)), \
+ (__v2df)_mm_setzero_pd()); })
#define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_di(), \
- (__mmask8)-1); })
+ (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \
+ (__v4di)_mm256_undefined_si256(), \
+ ((imm) & 1) ? 2 : 0, \
+ ((imm) & 1) ? 3 : 1); })
#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
+ (__v2di)(W)); })
#define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
- (int)(imm), \
- (__v2di)_mm_setzero_di(), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
+ (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \
+ (__v2di)_mm_setzero_di()); })
#define _mm256_insertf64x2(A, B, imm) __extension__ ({ \
(__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \
}
#define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \
- (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1); })
+ (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \
+ (__v8sf)_mm256_undefined_ps(), \
+ ((imm) & 1) ? 4 : 0, \
+ ((imm) & 1) ? 5 : 1, \
+ ((imm) & 1) ? 6 : 2, \
+ ((imm) & 1) ? 7 : 3); })
#define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \
- (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)(__m128)(W), \
- (__mmask8)(U)); })
+ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
+ (__v4sf)(W)); })
#define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \
- (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
- (int)(imm), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(U)); })
+ (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \
+ (__v4sf)_mm_setzero_ps()); })
#define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)-1); })
+ (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \
+ (__v8si)_mm256_undefined_si256(), \
+ ((imm) & 1) ? 4 : 0, \
+ ((imm) & 1) ? 5 : 1, \
+ ((imm) & 1) ? 6 : 2, \
+ ((imm) & 1) ? 7 : 3); })
#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)(__m128i)(W), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
+ (__v4si)(W)); })
#define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \
- (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
- (int)(imm), \
- (__v4si)_mm_setzero_si128(), \
- (__mmask8)(U)); })
+ (__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \
+ (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \
+ (__v4si)_mm_setzero_si128()); })
#define _mm256_insertf32x4(A, B, imm) __extension__ ({ \
(__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \
switch (BuiltinID) {
default:
return false;
- case X86::BI__builtin_ia32_extractf64x4_mask:
- case X86::BI__builtin_ia32_extracti64x4_mask:
- case X86::BI__builtin_ia32_extractf32x8_mask:
- case X86::BI__builtin_ia32_extracti32x8_mask:
- case X86::BI__builtin_ia32_extractf64x2_256_mask:
- case X86::BI__builtin_ia32_extracti64x2_256_mask:
- case X86::BI__builtin_ia32_extractf32x4_256_mask:
- case X86::BI__builtin_ia32_extracti32x4_256_mask:
- i = 1; l = 0; u = 1;
- break;
case X86::BI_mm_prefetch:
- case X86::BI__builtin_ia32_extractf32x4_mask:
- case X86::BI__builtin_ia32_extracti32x4_mask:
- case X86::BI__builtin_ia32_extractf64x2_512_mask:
- case X86::BI__builtin_ia32_extracti64x2_512_mask:
i = 1; l = 0; u = 3;
break;
case X86::BI__builtin_ia32_insertf32x8_mask:
}
__m256 test_mm512_extractf32x8_ps(__m512 __A) {
// CHECK-LABEL: @test_mm512_extractf32x8_ps
- // CHECK: @llvm.x86.avx512.mask.vextractf32x8
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_extractf32x8_ps(__A, 1);
}
__m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) {
// CHECK-LABEL: @test_mm512_mask_extractf32x8_ps
- // CHECK: @llvm.x86.avx512.mask.vextractf32x8
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1);
}
__m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) {
// CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps
- // CHECK: @llvm.x86.avx512.mask.vextractf32x8
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm512_maskz_extractf32x8_ps(__U, __A, 1);
}
__m128d test_mm512_extractf64x2_pd(__m512d __A) {
// CHECK-LABEL: @test_mm512_extractf64x2_pd
- // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+ // CHECK: shufflevector <8 x double> %0, <8 x double> undef, <2 x i32> <i32 6, i32 7>
return _mm512_extractf64x2_pd(__A, 3);
}
__m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) {
// CHECK-LABEL: @test_mm512_mask_extractf64x2_pd
- // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3);
}
__m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) {
// CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd
- // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm512_maskz_extractf64x2_pd(__U, __A, 3);
}
__m256i test_mm512_extracti32x8_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti32x8_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x8
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
return _mm512_extracti32x8_epi32(__A, 1);
}
__m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x8
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1);
}
__m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x8
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm512_maskz_extracti32x8_epi32(__U, __A, 1);
}
__m128i test_mm512_extracti64x2_epi64(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti64x2_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
return _mm512_extracti64x2_epi64(__A, 3);
}
__m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3);
}
__m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm512_maskz_extracti64x2_epi64(__U, __A, 3);
}
__m256d test_mm512_extractf64x4_pd(__m512d a)
{
// CHECK-LABEL: @test_mm512_extractf64x4_pd
- // CHECK: @llvm.x86.avx512.mask.vextractf64x4.512
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extractf64x4_pd(a, 1);
}
__m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
- //CHECL:@llvm.x86.avx512.mask.vextractf64x4.512
- return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
+ // CHECK-LABEL:@test_mm512_mask_extractf64x4_pd
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+ return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1);
}
__m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
- //CHECL:@llvm.x86.avx512.mask.vextractf64x4.512
- return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
+ // CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
+ return _mm512_maskz_extractf64x4_pd( __U, __A, 1);
}
__m128 test_mm512_extractf32x4_ps(__m512 a)
{
// CHECK-LABEL: @test_mm512_extractf32x4_ps
- // CHECK: @llvm.x86.avx512.mask.vextractf32x4.512
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extractf32x4_ps(a, 1);
}
__m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
- //CHECL: @llvm.x86.avx512.mask.vextractf32x4.512
- return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
+ // CHECK-LABEL:@test_mm512_mask_extractf32x4_ps
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
+ return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1);
}
__m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512d __A){
- //CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
- //CHECL: @llvm.x86.avx512.mask.vextractf32x4.512
+ // CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm512_maskz_extractf32x4_ps( __U, __A, 1);
}
__m128i test_mm512_extracti32x4_epi32(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti32x4_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
return _mm512_extracti32x4_epi32(__A, 3);
}
__m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3);
}
__m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+ // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
return _mm512_maskz_extracti32x4_epi32(__U, __A, 3);
}
__m256i test_mm512_extracti64x4_epi64(__m512i __A) {
// CHECK-LABEL: @test_mm512_extracti64x4_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm512_extracti64x4_epi64(__A, 1);
}
__m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1);
}
__m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) {
// CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm512_maskz_extracti64x4_epi64(__U, __A, 1);
}
__m128 test_mm256_extractf32x4_ps(__m256 __A) {
// CHECK-LABEL: @test_mm256_extractf32x4_ps
- // CHECK: @llvm.x86.avx512.mask.vextractf32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extractf32x4_ps(__A, 1);
}
__m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_mask_extractf32x4_ps
- // CHECK: @llvm.x86.avx512.mask.vextractf32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1);
}
__m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) {
// CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps
- // CHECK: @llvm.x86.avx512.mask.vextractf32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_maskz_extractf32x4_ps(__U, __A, 1);
}
__m128i test_mm256_extracti32x4_epi32(__m256i __A) {
// CHECK-LABEL: @test_mm256_extracti32x4_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
return _mm256_extracti32x4_epi32(__A, 1);
}
__m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1);
}
__m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32
- // CHECK: @llvm.x86.avx512.mask.vextracti32x4
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm256_maskz_extracti32x4_epi32(__U, __A, 1);
}
__m128d test_mm256_extractf64x2_pd(__m256d __A) {
// CHECK-LABEL: @test_mm256_extractf64x2_pd
- // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
return _mm256_extractf64x2_pd(__A, 1);
}
__m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) {
// CHECK-LABEL: @test_mm256_mask_extractf64x2_pd
- // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1);
}
__m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) {
// CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd
- // CHECK: @llvm.x86.avx512.mask.vextractf64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> <i32 2, i32 3>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm256_maskz_extractf64x2_pd(__U, __A, 1);
}
__m128i test_mm256_extracti64x2_epi64(__m256i __A) {
// CHECK-LABEL: @test_mm256_extracti64x2_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
return _mm256_extracti64x2_epi64(__A, 1);
}
__m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1);
}
__m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) {
// CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64
- // CHECK: @llvm.x86.avx512.mask.vextracti64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
return _mm256_maskz_extracti64x2_epi64(__U, __A, 1);
}