From cbebc3b2e70a27da2134841da0f72e404b699ecf Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 31 Oct 2016 04:30:56 +0000 Subject: [PATCH] [AVX-512] Remove masked vector extract builtins and replace with native shufflevectors and selects. Unfortunately, the backend currently doesn't fold masks into the instructions correctly when they come from these shufflevectors. I'll work on that in a future commit. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@285540 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 12 ---- lib/Headers/avx512dqintrin.h | 94 ++++++++++++++++------------- lib/Headers/avx512fintrin.h | 93 ++++++++++++++++------------ lib/Headers/avx512vldqintrin.h | 44 ++++++-------- lib/Headers/avx512vlintrin.h | 48 +++++++-------- lib/Sema/SemaChecking.cpp | 14 ----- test/CodeGen/avx512dq-builtins.c | 32 ++++++---- test/CodeGen/avx512f-builtins.c | 46 ++++++++------ test/CodeGen/avx512vl-builtins.c | 16 +++-- test/CodeGen/avx512vldq-builtins.c | 16 +++-- 10 files changed, 216 insertions(+), 199 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index e028b3b48a..ba1d51dfee 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1004,8 +1004,6 @@ TARGET_BUILTIN(__builtin_ia32_alignd128_mask, "V4iV4iV4iIiV4iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "", "avx512f") -TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "", "avx512f") TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUci","","avx512vl") TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUci","","avx512vl") @@ -1727,16 +1725,6 @@ TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc","","avx512dq") -TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc","","avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_insertf32x8_mask, "V16fV16fV8fIiV16fUs","","avx512dq") TARGET_BUILTIN(__builtin_ia32_insertf64x2_512_mask, "V8dV8dV2dIiV8dUc","","avx512dq") TARGET_BUILTIN(__builtin_ia32_inserti32x8_mask, "V16iV16iV8iIiV16iUs","","avx512dq") diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h index a24a91cf7c..c5ae250897 100644 --- a/lib/Headers/avx512dqintrin.h +++ b/lib/Headers/avx512dqintrin.h @@ -1116,70 +1116,80 @@ _mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) } #define _mm512_extractf32x8_ps(A, imm) __extension__ ({ \ - (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)-1); }) + (__m256)__builtin_shufflevector((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_undefined_ps(), \ + ((imm) & 1) ? 8 : 0, \ + ((imm) & 1) ? 9 : 1, \ + ((imm) & 1) ? 10 : 2, \ + ((imm) & 1) ? 11 : 3, \ + ((imm) & 1) ? 12 : 4, \ + ((imm) & 1) ? 13 : 5, \ + ((imm) & 1) ? 14 : 6, \ + ((imm) & 1) ? 15 : 7); }) #define _mm512_mask_extractf32x8_ps(W, U, A, imm) __extension__ ({ \ - (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)(__m256)(W), \ - (__mmask8)(U)); }) + (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ + (__v8sf)(W)); }) #define _mm512_maskz_extractf32x8_ps(U, A, imm) __extension__ ({ \ - (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v8sf)_mm256_setzero_ps(), \ - (__mmask8)(U)); }) + (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm512_extractf32x8_ps((A), (imm)), \ + (__v8sf)_mm256_setzero_ps()); }) #define _mm512_extractf64x2_pd(A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__m128d)__builtin_shufflevector((__v8df)(__m512d)(A), \ + (__v8df)_mm512_undefined_pd(), \ + 0 + ((imm) & 0x3) * 2, \ + 1 + ((imm) & 0x3) * 2); }) #define _mm512_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ + (__v2df)(W)); }) #define _mm512_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm512_extractf64x2_pd((A), (imm)), \ + (__v2df)_mm_setzero_pd()); }) #define _mm512_extracti32x8_epi32(A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)-1); }) + (__m256i)__builtin_shufflevector((__v16si)(__m512i)(A), \ + (__v16si)_mm512_undefined_epi32(), \ + ((imm) & 1) ? 8 : 0, \ + ((imm) & 1) ? 9 : 1, \ + ((imm) & 1) ? 10 : 2, \ + ((imm) & 1) ? 11 : 3, \ + ((imm) & 1) ? 12 : 4, \ + ((imm) & 1) ? 13 : 5, \ + ((imm) & 1) ? 14 : 6, \ + ((imm) & 1) ? 15 : 7); }) #define _mm512_mask_extracti32x8_epi32(W, U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)(__m256i)(W), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ + (__v8si)(W)); }) #define _mm512_maskz_extracti32x8_epi32(U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v8si)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ + (__v8si)_mm512_extracti32x8_epi32((A), (imm)), \ + (__v8si)_mm256_setzero_si256()); }) #define _mm512_extracti64x2_epi64(A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ - (int)(imm), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)-1); }) + (__m128i)__builtin_shufflevector((__v8di)(__m512i)(A), \ + (__v8di)_mm512_undefined_epi32(), \ + 0 + ((imm) & 0x3) * 2, \ + 1 + ((imm) & 0x3) * 2); }) #define _mm512_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ - (int)(imm), \ - (__v2di)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ + (__v2di)(W)); }) #define _mm512_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \ - (int)(imm), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm512_extracti64x2_epi64((A), (imm)), \ + (__v2di)_mm_setzero_di()); }) #define _mm512_insertf32x8(A, B, imm) __extension__ ({ \ (__m512)__builtin_ia32_insertf32x8_mask((__v16sf)(__m512)(A), \ diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index d08d7574f7..1cadb865e0 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -3440,35 +3440,42 @@ _mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, (__mmask16)(U)); }) /* Vector Extract */ -#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ - (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \ - (__v4df)_mm256_setzero_si256(), \ - (__mmask8)-1); }) +#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ + (__m256d)__builtin_shufflevector((__v8df)(__m512d)(A), \ + (__v8df)_mm512_undefined_pd(), \ + ((I) & 1) ? 4 : 0, \ + ((I) & 1) ? 5 : 1, \ + ((I) & 1) ? 6 : 2, \ + ((I) & 1) ? 7 : 3); }) #define _mm512_mask_extractf64x4_pd(W, U, A, imm) __extension__ ({\ - (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__v4df)(__m256d)(W), \ - (__mmask8)(U)); }) + (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ + (__v4df)(W)); }) #define _mm512_maskz_extractf64x4_pd(U, A, imm) __extension__ ({\ - (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \ - (__v4df)_mm256_setzero_pd(), \ - (__mmask8)(U)); }) + (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ + (__v4df)_mm512_extractf64x4_pd((A), (imm)), \ + (__v4df)_mm256_setzero_pd()); }) -#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ - (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) +#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ + (__m128)__builtin_shufflevector((__v16sf)(__m512)(A), \ + (__v16sf)_mm512_undefined_ps(), \ + 0 + ((I) & 0x3) * 4, \ + 1 + ((I) & 0x3) * 4, \ + 2 + ((I) & 0x3) * 4, \ + 3 + ((I) & 0x3) * 4); }) #define _mm512_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({\ - (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ + (__v4sf)(W)); }) #define _mm512_maskz_extractf32x4_ps(U, A, imm) __extension__ ({\ - (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm512_extractf32x4_ps((A), (imm)), \ + (__v4sf)_mm_setzero_ps()); }) + /* Vector Blend */ static __inline __m512d __DEFAULT_FN_ATTRS @@ -7895,35 +7902,41 @@ _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } -#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)_mm_undefined_si128(), \ - (__mmask8)-1); }) +#define _mm512_extracti32x4_epi32(A, imm) __extension__ ({ \ + (__m128i)__builtin_shufflevector((__v16si)(__m512i)(A), \ + (__v16si)_mm512_undefined_epi32(), \ + 0 + ((imm) & 0x3) * 4, \ + 1 + ((imm) & 0x3) * 4, \ + 2 + ((imm) & 0x3) * 4, \ + 3 + ((imm) & 0x3) * 4); }) #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \ + (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ + (__v4si)__W); }) #define _mm512_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, \ + (__v4si)_mm512_extracti32x4_epi32((A), (imm)), \ + (__v4si)_mm_setzero_si128()); }) -#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)_mm256_undefined_si256(), \ - (__mmask8)-1); }) +#define _mm512_extracti64x4_epi64(A, imm) __extension__ ({ \ + (__m256i)__builtin_shufflevector((__v8di)(__m512i)(A), \ + (__v8di)_mm512_undefined_epi32(), \ + ((imm) & 1) ? 4 : 0, \ + ((imm) & 1) ? 5 : 1, \ + ((imm) & 1) ? 6 : 2, \ + ((imm) & 1) ? 7 : 3); }) #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)(__m256i)(W), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ + (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ + (__v4di)__W); }) #define _mm512_maskz_extracti64x4_epi64(U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) + (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ + (__v4di)_mm512_extracti64x4_epi64((A), (imm)), \ + (__v4di)_mm256_setzero_si256()); }) #define _mm512_insertf64x4(A, B, imm) __extension__ ({ \ (__m512d)__builtin_ia32_insertf64x4_mask((__v8df)(__m512d)(A), \ diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h index ffe59595e8..0f617c1a49 100644 --- a/lib/Headers/avx512vldqintrin.h +++ b/lib/Headers/avx512vldqintrin.h @@ -1096,40 +1096,36 @@ _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) } #define _mm256_extractf64x2_pd(A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)-1); }) + (__m128d)__builtin_shufflevector((__v4df)(__m256d)(A), \ + (__v4df)_mm256_undefined_pd(), \ + ((imm) & 1) ? 2 : 0, \ + ((imm) & 1) ? 3 : 1); }) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)(__m128d)(W), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ + (__v2df)(W)); }) #define _mm256_maskz_extractf64x2_pd(U, A, imm) __extension__ ({ \ - (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ - (int)(imm), \ - (__v2df)_mm_setzero_pd(), \ - (__mmask8)(U)); }) + (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ + (__v2df)_mm256_extractf64x2_pd((A), (imm)), \ + (__v2df)_mm_setzero_pd()); }) #define _mm256_extracti64x2_epi64(A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ - (int)(imm), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)-1); }) + (__m128i)__builtin_shufflevector((__v4di)(__m256i)(A), \ + (__v4di)_mm256_undefined_si256(), \ + ((imm) & 1) ? 2 : 0, \ + ((imm) & 1) ? 3 : 1); }) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ - (int)(imm), \ - (__v2di)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ + (__v2di)(W)); }) #define _mm256_maskz_extracti64x2_epi64(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ - (int)(imm), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ + (__v2di)_mm256_extracti64x2_epi64((A), (imm)), \ + (__v2di)_mm_setzero_di()); }) #define _mm256_insertf64x2(A, B, imm) __extension__ ({ \ (__m256d)__builtin_ia32_insertf64x2_256_mask((__v4df)(__m256d)(A), \ diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index e915087943..42e5d918cb 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -8273,40 +8273,40 @@ _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) } #define _mm256_extractf32x4_ps(A, imm) __extension__ ({ \ - (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)-1); }) + (__m128)__builtin_shufflevector((__v8sf)(__m256)(A), \ + (__v8sf)_mm256_undefined_ps(), \ + ((imm) & 1) ? 4 : 0, \ + ((imm) & 1) ? 5 : 1, \ + ((imm) & 1) ? 6 : 2, \ + ((imm) & 1) ? 7 : 3); }) #define _mm256_mask_extractf32x4_ps(W, U, A, imm) __extension__ ({ \ - (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)(__m128)(W), \ - (__mmask8)(U)); }) + (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ + (__v4sf)(W)); }) #define _mm256_maskz_extractf32x4_ps(U, A, imm) __extension__ ({ \ - (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ - (int)(imm), \ - (__v4sf)_mm_setzero_ps(), \ - (__mmask8)(U)); }) + (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4sf)_mm256_extractf32x4_ps((A), (imm)), \ + (__v4sf)_mm_setzero_ps()); }) #define _mm256_extracti32x4_epi32(A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)-1); }) + (__m128i)__builtin_shufflevector((__v8si)(__m256)(A), \ + (__v8si)_mm256_undefined_si256(), \ + ((imm) & 1) ? 4 : 0, \ + ((imm) & 1) ? 5 : 1, \ + ((imm) & 1) ? 6 : 2, \ + ((imm) & 1) ? 7 : 3); }) #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)(__m128i)(W), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ + (__v4si)(W)); }) #define _mm256_maskz_extracti32x4_epi32(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ - (int)(imm), \ - (__v4si)_mm_setzero_si128(), \ - (__mmask8)(U)); }) + (__m128i)__builtin_ia32_selectps_128((__mmask8)(U), \ + (__v4si)_mm256_extracti32x4_epi32((A), (imm)), \ + (__v4si)_mm_setzero_si128()); }) #define _mm256_insertf32x4(A, B, imm) __extension__ ({ \ (__m256)__builtin_ia32_insertf32x4_256_mask((__v8sf)(__m256)(A), \ diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index c0d85c9ab0..c73bd4c6ed 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -1971,21 +1971,7 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { switch (BuiltinID) { default: return false; - case X86::BI__builtin_ia32_extractf64x4_mask: - case X86::BI__builtin_ia32_extracti64x4_mask: - case X86::BI__builtin_ia32_extractf32x8_mask: - case X86::BI__builtin_ia32_extracti32x8_mask: - case X86::BI__builtin_ia32_extractf64x2_256_mask: - case X86::BI__builtin_ia32_extracti64x2_256_mask: - case X86::BI__builtin_ia32_extractf32x4_256_mask: - case X86::BI__builtin_ia32_extracti32x4_256_mask: - i = 1; l = 0; u = 1; - break; case X86::BI_mm_prefetch: - case X86::BI__builtin_ia32_extractf32x4_mask: - case X86::BI__builtin_ia32_extracti32x4_mask: - case X86::BI__builtin_ia32_extractf64x2_512_mask: - case X86::BI__builtin_ia32_extracti64x2_512_mask: i = 1; l = 0; u = 3; break; case X86::BI__builtin_ia32_insertf32x8_mask: diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c index 9a2b48cc4a..d6191ecd39 100644 --- a/test/CodeGen/avx512dq-builtins.c +++ b/test/CodeGen/avx512dq-builtins.c @@ -1054,73 +1054,81 @@ __m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { } __m256 test_mm512_extractf32x8_ps(__m512 __A) { // CHECK-LABEL: @test_mm512_extractf32x8_ps - // CHECK: @llvm.x86.avx512.mask.vextractf32x8 + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> return _mm512_extractf32x8_ps(__A, 1); } __m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_mask_extractf32x8_ps - // CHECK: @llvm.x86.avx512.mask.vextractf32x8 + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm512_mask_extractf32x8_ps(__W, __U, __A, 1); } __m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) { // CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps - // CHECK: @llvm.x86.avx512.mask.vextractf32x8 + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}} return _mm512_maskz_extractf32x8_ps(__U, __A, 1); } __m128d test_mm512_extractf64x2_pd(__m512d __A) { // CHECK-LABEL: @test_mm512_extractf64x2_pd - // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + // CHECK: shufflevector <8 x double> %0, <8 x double> undef, <2 x i32> return _mm512_extractf64x2_pd(__A, 3); } __m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_mask_extractf64x2_pd - // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm512_mask_extractf64x2_pd(__W, __U, __A, 3); } __m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) { // CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd - // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm512_maskz_extractf64x2_pd(__U, __A, 3); } __m256i test_mm512_extracti32x8_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti32x8_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x8 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> return _mm512_extracti32x8_epi32(__A, 1); } __m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x8 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 1); } __m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x8 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <8 x i32> + // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm512_maskz_extracti32x8_epi32(__U, __A, 1); } __m128i test_mm512_extracti64x2_epi64(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti64x2_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> return _mm512_extracti64x2_epi64(__A, 3); } __m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 3); } __m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm512_maskz_extracti64x2_epi64(__U, __A, 3); } diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 2858f98cf2..55b44b8698 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -1236,38 +1236,42 @@ __mmask8 test_mm512_mask_cmpunord_ps_mask(__mmask8 k, __m512 a, __m512 b) { __m256d test_mm512_extractf64x4_pd(__m512d a) { // CHECK-LABEL: @test_mm512_extractf64x4_pd - // CHECK: @llvm.x86.avx512.mask.vextractf64x4.512 + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> return _mm512_extractf64x4_pd(a, 1); } __m256d test_mm512_mask_extractf64x4_pd(__m256d __W,__mmask8 __U,__m512d __A){ - //CHECK-LABEL:@test_mm512_mask_extractf64x4_pd - //CHECL:@llvm.x86.avx512.mask.vextractf64x4.512 - return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1); + // CHECK-LABEL:@test_mm512_mask_extractf64x4_pd + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm512_mask_extractf64x4_pd( __W, __U, __A, 1); } __m256d test_mm512_maskz_extractf64x4_pd(__mmask8 __U,__m512d __A){ - //CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd - //CHECL:@llvm.x86.avx512.mask.vextractf64x4.512 - return _mm512_maskz_extractf64x4_pd( __U, __A, 1); + // CHECK-LABEL:@test_mm512_maskz_extractf64x4_pd + // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}} + return _mm512_maskz_extractf64x4_pd( __U, __A, 1); } __m128 test_mm512_extractf32x4_ps(__m512 a) { // CHECK-LABEL: @test_mm512_extractf32x4_ps - // CHECK: @llvm.x86.avx512.mask.vextractf32x4.512 + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> return _mm512_extractf32x4_ps(a, 1); } __m128 test_mm512_mask_extractf32x4_ps(__m128 __W, __mmask8 __U,__m512d __A){ - //CHECK-LABEL:@test_mm512_mask_extractf32x4_ps - //CHECL: @llvm.x86.avx512.mask.vextractf32x4.512 - return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1); + // CHECK-LABEL:@test_mm512_mask_extractf32x4_ps + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} + return _mm512_mask_extractf32x4_ps( __W, __U, __A, 1); } __m128 test_mm512_maskz_extractf32x4_ps( __mmask8 __U,__m512d __A){ - //CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps - //CHECL: @llvm.x86.avx512.mask.vextractf32x4.512 + // CHECK-LABEL:@test_mm512_maskz_extractf32x4_ps + // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm512_maskz_extractf32x4_ps( __U, __A, 1); } @@ -5097,37 +5101,41 @@ void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) __m128i test_mm512_extracti32x4_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti32x4_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> return _mm512_extracti32x4_epi32(__A, 3); } __m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 3); } __m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm512_maskz_extracti32x4_epi32(__U, __A, 3); } __m256i test_mm512_extracti64x4_epi64(__m512i __A) { // CHECK-LABEL: @test_mm512_extracti64x4_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x4 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> return _mm512_extracti64x4_epi64(__A, 1); } __m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x4 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 1); } __m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x4 + // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm512_maskz_extracti64x4_epi64(__U, __A, 1); } diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index b6cb5fbd48..147522be19 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -6555,37 +6555,41 @@ void test_mm256_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A __m128 test_mm256_extractf32x4_ps(__m256 __A) { // CHECK-LABEL: @test_mm256_extractf32x4_ps - // CHECK: @llvm.x86.avx512.mask.vextractf32x4 + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> return _mm256_extractf32x4_ps(__A, 1); } __m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_mask_extractf32x4_ps - // CHECK: @llvm.x86.avx512.mask.vextractf32x4 + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1); } __m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps - // CHECK: @llvm.x86.avx512.mask.vextractf32x4 + // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> undef, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_maskz_extractf32x4_ps(__U, __A, 1); } __m128i test_mm256_extracti32x4_epi32(__m256i __A) { // CHECK-LABEL: @test_mm256_extracti32x4_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> return _mm256_extracti32x4_epi32(__A, 1); } __m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1); } __m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32 - // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <4 x i32> + // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}} return _mm256_maskz_extracti32x4_epi32(__U, __A, 1); } diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c index 1ac56ef449..1b953aec22 100644 --- a/test/CodeGen/avx512vldq-builtins.c +++ b/test/CodeGen/avx512vldq-builtins.c @@ -992,37 +992,41 @@ __m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { __m128d test_mm256_extractf64x2_pd(__m256d __A) { // CHECK-LABEL: @test_mm256_extractf64x2_pd - // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> return _mm256_extractf64x2_pd(__A, 1); } __m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) { // CHECK-LABEL: @test_mm256_mask_extractf64x2_pd - // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm256_mask_extractf64x2_pd(__W, __U, __A, 1); } __m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) { // CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd - // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} return _mm256_maskz_extractf64x2_pd(__U, __A, 1); } __m128i test_mm256_extracti64x2_epi64(__m256i __A) { // CHECK-LABEL: @test_mm256_extracti64x2_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> return _mm256_extracti64x2_epi64(__A, 1); } __m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 1); } __m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64 - // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> undef, <2 x i32> + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm256_maskz_extracti64x2_epi64(__U, __A, 1); } -- 2.40.0