From 775c7affdce3b4ab05558f0cfccebf363da437b4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Nov 2016 07:16:59 +0000 Subject: [PATCH] [AVX-512] Convert the rest of the masked shift by immediate and by single element builtins over to the newly added unmasked builtins and a select. This should also fix PR30691 since the new builtins are handled like the legacy builtins in the backend. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@286714 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 44 ++-- lib/Headers/avx512bwintrin.h | 178 +++++++------- lib/Headers/avx512fintrin.h | 347 ++++++++++++++-------------- lib/Headers/avx512vlintrin.h | 118 +++++----- lib/Sema/SemaChecking.cpp | 11 - test/CodeGen/avx512bw-builtins.c | 48 ++-- test/CodeGen/avx512f-builtins.c | 95 +++++--- test/CodeGen/avx512vl-builtins.c | 32 ++- 8 files changed, 447 insertions(+), 426 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index 485363ae3b..07b20b3cf5 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -1359,26 +1359,26 @@ TARGET_BUILTIN(__builtin_ia32_prorvd256_mask, "V8iV8iV8iV8iUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_prorvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_prorvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_psllv32hi_mask, "V32sV32sV32sV32sUi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllw512_mask, "V32sV32sV8sV32sUi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllwi512_mask, "V32sV32sIiV32sUi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si","","avx512bw") TARGET_BUILTIN(__builtin_ia32_psllv16hi_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_psllv8hi_mask, "V8sV8sV8sV8sUc","","avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pslldi512_mask, "V16iV16iIiV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psllqi512_mask, "V8LLiV8LLiIiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii","","avx512f") TARGET_BUILTIN(__builtin_ia32_psrlv32hi_mask, "V32sV32sV32sV32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_psrlv16hi_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_psrlv8hi_mask, "V8sV8sV8sV8sUc","","avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psrldi512_mask, "V16iV16iIiV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlqi512_mask, "V8LLiV8LLiIiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii","","avx512f") TARGET_BUILTIN(__builtin_ia32_psrav32hi_mask, "V32sV32sV32sV32sUi","","avx512bw") TARGET_BUILTIN(__builtin_ia32_psrav16hi_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_psrav8hi_mask, "V8sV8sV8sV8sUc","","avx512bw,avx512vl") TARGET_BUILTIN(__builtin_ia32_psravq128_mask, "V2LLiV2LLiV2LLiV2LLiUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_psravq256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraw512_mask, "V32sV32sV8sV32sUi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrawi512_mask, "V32sV32sIiV32sUi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlw512_mask, "V32sV32sV8sV32sUi","","avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlwi512_mask, "V32sV32sIiV32sUi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si","","avx512bw") TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs","","avx512f") @@ -1539,22 +1539,22 @@ TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi","","avx512f") TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psradi512_mask, "V16iV16iIiV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psraqi512_mask, "V8LLiV8LLiIiV8LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psraq128_mask, "V2LLiV2LLiV2LLiV2LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraq256_mask, "V4LLiV4LLiV2LLiV4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraqi128_mask, "V2LLiV2LLiIiV2LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraqi256_mask, "V4LLiV4LLiIiV4LLiUc","","avx512vl") -TARGET_BUILTIN(__builtin_ia32_pslld512_mask, "V16iV16iV4iV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psllq512_mask, "V8LLiV8LLiV2LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi","","avx512f") TARGET_BUILTIN(__builtin_ia32_psllv16si_mask, "V16iV16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_psllv8di_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psrad512_mask, "V16iV16iV4iV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psraq512_mask, "V8LLiV8LLiV2LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi","","avx512f") TARGET_BUILTIN(__builtin_ia32_psrav16si_mask, "V16iV16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_psrav8di_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psrld512_mask, "V16iV16iV4iV16iUs","","avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlq512_mask, "V8LLiV8LLiV2LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i","","avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi","","avx512f") TARGET_BUILTIN(__builtin_ia32_psrlv16si_mask, "V16iV16iV16iV16iUs","","avx512f") TARGET_BUILTIN(__builtin_ia32_psrlv8di_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f") TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs","","avx512f") diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h index fe1d4122d5..e4dfe212b7 100644 --- a/lib/Headers/avx512bwintrin.h +++ b/lib/Headers/avx512bwintrin.h @@ -1718,49 +1718,48 @@ _mm512_maskz_sllv_epi16 (__mmask32 __U, __m512i __A, __m512i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sll_epi16 (__m512i __A, __m128i __B) +_mm512_sll_epi16(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sll_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m128i __B) +_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sll_epi16(__A, __B), \ + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sll_epi16 (__mmask32 __U, __m512i __A, __m128i __B) +_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sll_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); } -#define _mm512_slli_epi16(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_slli_epi16(__m512i __A, int __B) +{ + (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); +} -#define _mm512_mask_slli_epi16(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_slli_epi16(__A, __B), \ + (__v32hi)__W); +} -#define _mm512_maskz_slli_epi16(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllwi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_slli_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); +} #define _mm512_bslli_epi128(a, imm) __extension__ ({ \ (__m512i)__builtin_shufflevector( \ @@ -1892,95 +1891,92 @@ _mm512_maskz_srav_epi16 (__mmask32 __U, __m512i __A, __m512i __B) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sra_epi16 (__m512i __A, __m128i __B) +_mm512_sra_epi16(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sra_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m128i __B) +_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sra_epi16(__A, __B), \ + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sra_epi16 (__mmask32 __U, __m512i __A, __m128i __B) +_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_sra_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); } -#define _mm512_srai_epi16(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)-1); }) - -#define _mm512_mask_srai_epi16(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srai_epi16(__m512i __A, int __B) +{ + (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); +} -#define _mm512_maskz_srai_epi16(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrawi512_mask((__v32hi)(__m512i)(A), (int)(B), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srai_epi16(__A, __B), \ + (__v32hi)__W); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srai_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); +} static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_srl_epi16 (__m512i __A, __m128i __B) +_mm512_srl_epi16(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) -1); + return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srl_epi16 (__m512i __W, __mmask32 __U, __m512i __A, - __m128i __B) +_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) __W, - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srl_epi16(__A, __B), \ + (__v32hi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srl_epi16 (__mmask32 __U, __m512i __A, __m128i __B) +_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlw512_mask ((__v32hi) __A, - (__v8hi) __B, - (__v32hi) - _mm512_setzero_hi (), - (__mmask32) __U); + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srl_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); } -#define _mm512_srli_epi16(A, imm) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srli_epi16(__m512i __A, int __B) +{ + (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); +} -#define _mm512_mask_srli_epi16(W, U, A, imm) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \ - (__v32hi)(__m512i)(W), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srli_epi16(__A, __B), \ + (__v32hi)__W); +} -#define _mm512_maskz_srli_epi16(U, A, imm) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlwi512_mask((__v32hi)(__m512i)(A), (int)(imm), \ - (__v32hi)_mm512_setzero_hi(), \ - (__mmask32)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, \ + (__v32hi)_mm512_srli_epi16(__A, __B), \ + (__v32hi)_mm512_setzero_hi()); +} #define _mm512_bsrli_epi128(a, imm) __extension__ ({ \ (__m512i)__builtin_shufflevector( \ diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 12fbf3a5ed..d7f8cf42bc 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -5457,67 +5457,91 @@ _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) (__v8di)_mm512_setzero_si512(), \ (__mmask8)(U)); }) -#define _mm512_slli_epi32(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) - -#define _mm512_mask_slli_epi32(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) - -#define _mm512_maskz_slli_epi32(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_pslldi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_slli_epi32(__m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B); +} -#define _mm512_slli_epi64(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_slli_epi32(__A, __B), \ + (__v16si)__W); +} -#define _mm512_mask_slli_epi64(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) { + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_slli_epi32(__A, __B), \ + (__v16si)_mm512_setzero_si512()); +} -#define _mm512_maskz_slli_epi64(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psllqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_slli_epi64(__m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_slli_epi64(__A, __B), \ + (__v8di)__W); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_slli_epi64(__A, __B), \ + (__v8di)_mm512_setzero_si512()); +} -#define _mm512_srli_epi32(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srli_epi32(__m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B); +} -#define _mm512_mask_srli_epi32(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_srli_epi32(__A, __B), \ + (__v16si)__W); +} -#define _mm512_maskz_srli_epi32(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrldi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) { + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_srli_epi32(__A, __B), \ + (__v16si)_mm512_setzero_si512()); +} -#define _mm512_srli_epi64(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srli_epi64(__m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B); +} -#define _mm512_mask_srli_epi64(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_srli_epi64(__A, __B), \ + (__v8di)__W); +} -#define _mm512_maskz_srli_epi64(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psrlqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_srli_epi64(__A, __B), \ + (__v8di)_mm512_setzero_si512()); +} static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) @@ -5992,61 +6016,47 @@ _mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sll_epi32 (__m512i __A, __m128i __B) +_mm512_sll_epi32(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) +_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_sll_epi32(__A, __B), \ + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) +_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_sll_epi32(__A, __B), \ + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sll_epi64 (__m512i __A, __m128i __B) +_mm512_sll_epi64(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) +_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_sll_epi64(__A, __B), \ + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) +_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_sll_epi64(__A, __B), \ + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -6108,61 +6118,47 @@ _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sra_epi32 (__m512i __A, __m128i __B) +_mm512_sra_epi32(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) +_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_sra_epi32(__A, __B), \ + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) +_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_sra_epi32(__A, __B), \ + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_sra_epi64 (__m512i __A, __m128i __B) +_mm512_sra_epi64(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) +_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_sra_epi64(__A, __B), \ + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) +_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_sra_epi64(__A, __B), \ + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -6224,61 +6220,47 @@ _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_srl_epi32 (__m512i __A, __m128i __B) +_mm512_srl_epi32(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) +_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_srl_epi32(__A, __B), \ + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) +_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, - (__v4si) __B, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_srl_epi32(__A, __B), \ + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_srl_epi64 (__m512i __A, __m128i __B) +_mm512_srl_epi64(__m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) +_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_srl_epi64(__A, __B), \ + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) +_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { - return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, - (__v2di) __B, - (__v8di) - _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_srl_epi64(__A, __B), \ + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -7110,35 +7092,48 @@ _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION); }) -#define _mm512_srai_epi32(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srai_epi32(__m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B); +} -#define _mm512_mask_srai_epi32(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)(__m512i)(W), \ - (__mmask16)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_srai_epi32(__A, __B), \ + (__v16si)__W); +} -#define _mm512_maskz_srai_epi32(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psradi512_mask((__v16si)(__m512i)(A), (int)(B), \ - (__v16si)_mm512_setzero_si512(), \ - (__mmask16)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) { + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ + (__v16si)_mm512_srai_epi32(__A, __B), \ + (__v16si)_mm512_setzero_si512()); +} -#define _mm512_srai_epi64(A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)-1); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_srai_epi64(__m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B); +} -#define _mm512_mask_srai_epi64(W, U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)(__m512i)(W), \ - (__mmask8)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_srai_epi64(__A, __B), \ + (__v8di)__W); +} -#define _mm512_maskz_srai_epi64(U, A, B) __extension__ ({ \ - (__m512i)__builtin_ia32_psraqi512_mask((__v8di)(__m512i)(A), (int)(B), \ - (__v8di)_mm512_setzero_si512(), \ - (__mmask8)(U)); }) +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) +{ + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ + (__v8di)_mm512_srai_epi64(__A, __B), \ + (__v8di)_mm512_setzero_si512()); +} #define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \ (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \ diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 1bff7a7e2a..734e651f16 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -6845,94 +6845,92 @@ _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sra_epi64 (__m128i __A, __m128i __B) +_mm_sra_epi64(__m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_di (), - (__mmask8) -1); + return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_mask_sra_epi64 (__m128i __W, __mmask8 __U, __m128i __A, - __m128i __B) +_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ + (__v2di)_mm_sra_epi64(__A, __B), \ + (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_maskz_sra_epi64 (__mmask8 __U, __m128i __A, __m128i __B) +_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { - return (__m128i) __builtin_ia32_psraq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) - _mm_setzero_di (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ + (__v2di)_mm_sra_epi64(__A, __B), \ + (__v2di)_mm_setzero_di()); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_sra_epi64 (__m256i __A, __m128i __B) +_mm256_sra_epi64(__m256i __A, __m128i __B) { - return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, - (__v2di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) -1); + return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_mask_sra_epi64 (__m256i __W, __mmask8 __U, __m256i __A, - __m128i __B) +_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { - return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, - (__v2di) __B, - (__v4di) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ + (__v4di)_mm256_sra_epi64(__A, __B), \ + (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS -_mm256_maskz_sra_epi64 (__mmask8 __U, __m256i __A, __m128i __B) +_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { - return (__m256i) __builtin_ia32_psraq256_mask ((__v4di) __A, - (__v2di) __B, - (__v4di) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ + (__v4di)_mm256_sra_epi64(__A, __B), \ + (__v4di)_mm256_setzero_si256()); } -#define _mm_srai_epi64(A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ - (__v2di)_mm_setzero_di(), \ - (__mmask8)-1); }) +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_srai_epi64(__m128i __A, int __imm) +{ + return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); +} -#define _mm_mask_srai_epi64(W, U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ - (__v2di)(__m128i)(W), \ - (__mmask8)(U)); }) +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) +{ + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ + (__v2di)_mm_srai_epi64(__A, __imm), \ + (__v2di)__W); +} -#define _mm_maskz_srai_epi64(U, A, imm) __extension__ ({ \ - (__m128i)__builtin_ia32_psraqi128_mask((__v2di)(__m128i)(A), (int)(imm), \ - (__v2di)_mm_setzero_si128(), \ - (__mmask8)(U)); }) +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) +{ + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ + (__v2di)_mm_srai_epi64(__A, __imm), \ + (__v2di)_mm_setzero_di()); +} -#define _mm256_srai_epi64(A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)-1); }) +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_srai_epi64(__m256i __A, int __imm) +{ + return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); +} -#define _mm256_mask_srai_epi64(W, U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ - (__v4di)(__m256i)(W), \ - (__mmask8)(U)); }) +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) +{ + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ + (__v4di)_mm256_srai_epi64(__A, __imm), \ + (__v4di)__W); +} -#define _mm256_maskz_srai_epi64(U, A, imm) __extension__ ({ \ - (__m256i)__builtin_ia32_psraqi256_mask((__v4di)(__m256i)(A), (int)(imm), \ - (__v4di)_mm256_setzero_si256(), \ - (__mmask8)(U)); }) +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) +{ + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ + (__v4di)_mm256_srai_epi64(__A, __imm), \ + (__v4di)_mm256_setzero_si256()); +} #define _mm_ternarylogic_epi32(A, B, C, imm) __extension__ ({ \ (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \ diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp index a283edd7de..6956b0633b 100644 --- a/lib/Sema/SemaChecking.cpp +++ b/lib/Sema/SemaChecking.cpp @@ -2107,17 +2107,6 @@ bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case X86::BI__builtin_ia32_prord256_mask: case X86::BI__builtin_ia32_prorq128_mask: case X86::BI__builtin_ia32_prorq256_mask: - case X86::BI__builtin_ia32_psllwi512_mask: - case X86::BI__builtin_ia32_psrldi512_mask: - case X86::BI__builtin_ia32_psrlqi512_mask: - case X86::BI__builtin_ia32_psrawi512_mask: - case X86::BI__builtin_ia32_psrlwi512_mask: - case X86::BI__builtin_ia32_psradi512_mask: - case X86::BI__builtin_ia32_psraqi128_mask: - case X86::BI__builtin_ia32_psraqi256_mask: - case X86::BI__builtin_ia32_psraqi512_mask: - case X86::BI__builtin_ia32_pslldi512_mask: - case X86::BI__builtin_ia32_psllqi512_mask: case X86::BI__builtin_ia32_fpclasspd128_mask: case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclassps128_mask: diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c index 3ec6c3010f..2df9829e31 100644 --- a/test/CodeGen/avx512bw-builtins.c +++ b/test/CodeGen/avx512bw-builtins.c @@ -1225,37 +1225,41 @@ __m512i test_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) { __m512i test_mm512_sll_epi16(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_sll_epi16 - // CHECK: @llvm.x86.avx512.mask.psll.w.512 + // CHECK: @llvm.x86.avx512.psll.w.512 return _mm512_sll_epi16(__A, __B); } __m512i test_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_sll_epi16 - // CHECK: @llvm.x86.avx512.mask.psll.w.512 + // CHECK: @llvm.x86.avx512.psll.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_sll_epi16(__W, __U, __A, __B); } __m512i test_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_sll_epi16 - // CHECK: @llvm.x86.avx512.mask.psll.w.512 + // CHECK: @llvm.x86.avx512.psll.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_sll_epi16(__U, __A, __B); } __m512i test_mm512_slli_epi16(__m512i __A) { // CHECK-LABEL: @test_mm512_slli_epi16 - // CHECK: @llvm.x86.avx512.mask.psll.wi.512 + // CHECK: @llvm.x86.avx512.pslli.w.512 return _mm512_slli_epi16(__A, 5); } __m512i test_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_slli_epi16 - // CHECK: @llvm.x86.avx512.mask.psll.wi.512 + // CHECK: @llvm.x86.avx512.pslli.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_slli_epi16(__W, __U, __A, 5); } __m512i test_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_slli_epi16 - // CHECK: @llvm.x86.avx512.mask.psll.wi.512 + // CHECK: @llvm.x86.avx512.pslli.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_slli_epi16(__U, __A, 5); } @@ -1303,73 +1307,81 @@ __m512i test_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) { __m512i test_mm512_sra_epi16(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_sra_epi16 - // CHECK: @llvm.x86.avx512.mask.psra.w.512 + // CHECK: @llvm.x86.avx512.psra.w.512 return _mm512_sra_epi16(__A, __B); } __m512i test_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_sra_epi16 - // CHECK: @llvm.x86.avx512.mask.psra.w.512 + // CHECK: @llvm.x86.avx512.psra.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_sra_epi16(__W, __U, __A, __B); } __m512i test_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_sra_epi16 - // CHECK: @llvm.x86.avx512.mask.psra.w.512 + // CHECK: @llvm.x86.avx512.psra.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_sra_epi16(__U, __A, __B); } __m512i test_mm512_srai_epi16(__m512i __A) { // CHECK-LABEL: @test_mm512_srai_epi16 - // CHECK: @llvm.x86.avx512.mask.psra.wi.512 + // CHECK: @llvm.x86.avx512.psrai.w.512 return _mm512_srai_epi16(__A, 5); } __m512i test_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_srai_epi16 - // CHECK: @llvm.x86.avx512.mask.psra.wi.512 + // CHECK: @llvm.x86.avx512.psrai.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srai_epi16(__W, __U, __A, 5); } __m512i test_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_srai_epi16 - // CHECK: @llvm.x86.avx512.mask.psra.wi.512 + // CHECK: @llvm.x86.avx512.psrai.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srai_epi16(__U, __A, 5); } __m512i test_mm512_srl_epi16(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_srl_epi16 - // CHECK: @llvm.x86.avx512.mask.psrl.w.512 + // CHECK: @llvm.x86.avx512.psrl.w.512 return _mm512_srl_epi16(__A, __B); } __m512i test_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_srl_epi16 - // CHECK: @llvm.x86.avx512.mask.psrl.w.512 + // CHECK: @llvm.x86.avx512.psrl.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srl_epi16(__W, __U, __A, __B); } __m512i test_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_srl_epi16 - // CHECK: @llvm.x86.avx512.mask.psrl.w.512 + // CHECK: @llvm.x86.avx512.psrl.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srl_epi16(__U, __A, __B); } __m512i test_mm512_srli_epi16(__m512i __A) { // CHECK-LABEL: @test_mm512_srli_epi16 - // CHECK: @llvm.x86.avx512.mask.psrl.wi.512 + // CHECK: @llvm.x86.avx512.psrli.w.512 return _mm512_srli_epi16(__A, 5); } __m512i test_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_srli_epi16 - // CHECK: @llvm.x86.avx512.mask.psrl.wi.512 + // CHECK: @llvm.x86.avx512.psrli.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_srli_epi16(__W, __U, __A, 5); } __m512i test_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_srli_epi16 - // CHECK: @llvm.x86.avx512.mask.psrl.wi.512 + // CHECK: @llvm.x86.avx512.psrli.w.512 + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_srli_epi16(__U, __A, 5); } diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 2b2d0ac2dd..0801177206 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -2808,73 +2808,80 @@ __m512i test_mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B) { __m512i test_mm512_slli_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_slli_epi32 - // CHECK: @llvm.x86.avx512.mask.psll.di.512 + // CHECK: @llvm.x86.avx512.pslli.d.512 return _mm512_slli_epi32(__A, 5); } __m512i test_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_slli_epi32 - // CHECK: @llvm.x86.avx512.mask.psll.di.512 + // CHECK: @llvm.x86.avx512.pslli.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_slli_epi32(__W, __U, __A, 5); } __m512i test_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_slli_epi32 - // CHECK: @llvm.x86.avx512.mask.psll.di.512 + // CHECK: @llvm.x86.avx512.pslli.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_slli_epi32(__U, __A, 5); } __m512i test_mm512_slli_epi64(__m512i __A) { // CHECK-LABEL: @test_mm512_slli_epi64 - // CHECK: @llvm.x86.avx512.mask.psll.qi.512 + // CHECK: @llvm.x86.avx512.pslli.q.512 return _mm512_slli_epi64(__A, 5); } __m512i test_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_slli_epi64 - // CHECK: @llvm.x86.avx512.mask.psll.qi.512 + // CHECK: @llvm.x86.avx512.pslli.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_slli_epi64(__W, __U, __A, 5); } __m512i test_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_slli_epi64 - // CHECK: @llvm.x86.avx512.mask.psll.qi.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_slli_epi64(__U, __A, 5); } __m512i test_mm512_srli_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_srli_epi32 - // CHECK: @llvm.x86.avx512.mask.psrl.di.512 + // CHECK: @llvm.x86.avx512.psrli.d.512 return _mm512_srli_epi32(__A, 5); } __m512i test_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_srli_epi32 - // CHECK: @llvm.x86.avx512.mask.psrl.di.512 + // CHECK: @llvm.x86.avx512.psrli.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srli_epi32(__W, __U, __A, 5); } __m512i test_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_srli_epi32 - // CHECK: @llvm.x86.avx512.mask.psrl.di.512 + // CHECK: @llvm.x86.avx512.psrli.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srli_epi32(__U, __A, 5); } __m512i test_mm512_srli_epi64(__m512i __A) { // CHECK-LABEL: @test_mm512_srli_epi64 - // CHECK: @llvm.x86.avx512.mask.psrl.qi.512 + // CHECK: @llvm.x86.avx512.psrli.q.512 return _mm512_srli_epi64(__A, 5); } __m512i test_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_srli_epi64 - // CHECK: @llvm.x86.avx512.mask.psrl.qi.512 + // CHECK: @llvm.x86.avx512.psrli.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srli_epi64(__W, __U, __A, 5); } __m512i test_mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_srli_epi64 - // CHECK: @llvm.x86.avx512.mask.psrl.qi.512 + // CHECK: @llvm.x86.avx512.psrli.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srli_epi64(__U, __A, 5); } @@ -4138,73 +4145,81 @@ __m128 test_mm_maskz_scalef_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ __m512i test_mm512_srai_epi32(__m512i __A) { // CHECK-LABEL: @test_mm512_srai_epi32 - // CHECK: @llvm.x86.avx512.mask.psra.di.512 + // CHECK: @llvm.x86.avx512.psrai.d.512 return _mm512_srai_epi32(__A, 5); } __m512i test_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_srai_epi32 - // CHECK: @llvm.x86.avx512.mask.psra.di.512 + // CHECK: @llvm.x86.avx512.psrai.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srai_epi32(__W, __U, __A, 5); } __m512i test_mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_srai_epi32 - // CHECK: @llvm.x86.avx512.mask.psra.di.512 + // CHECK: @llvm.x86.avx512.psrai.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srai_epi32(__U, __A, 5); } __m512i test_mm512_srai_epi64(__m512i __A) { // CHECK-LABEL: @test_mm512_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.512 + // CHECK: @llvm.x86.avx512.psrai.q.512 return _mm512_srai_epi64(__A, 5); } __m512i test_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.512 + // CHECK: @llvm.x86.avx512.psrai.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srai_epi64(__W, __U, __A, 5); } __m512i test_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_maskz_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.512 + // CHECK: @llvm.x86.avx512.psrai.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srai_epi64(__U, __A, 5); } __m512i test_mm512_sll_epi32(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_sll_epi32 - // CHECK: @llvm.x86.avx512.mask.psll.d + // CHECK: @llvm.x86.avx512.psll.d.512 return _mm512_sll_epi32(__A, __B); } __m512i test_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_sll_epi32 - // CHECK: @llvm.x86.avx512.mask.psll.d + // CHECK: @llvm.x86.avx512.psll.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sll_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_sll_epi32 - // CHECK: @llvm.x86.avx512.mask.psll.d + // CHECK: @llvm.x86.avx512.psll.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sll_epi32(__U, __A, __B); } __m512i test_mm512_sll_epi64(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_sll_epi64 - // CHECK: @llvm.x86.avx512.mask.psll.q + // CHECK: @llvm.x86.avx512.psll.q.512 return _mm512_sll_epi64(__A, __B); } __m512i test_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_sll_epi64 - // CHECK: @llvm.x86.avx512.mask.psll.q + // CHECK: @llvm.x86.avx512.psll.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sll_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_sll_epi64 - // CHECK: @llvm.x86.avx512.mask.psll.q + // CHECK: @llvm.x86.avx512.psll.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sll_epi64(__U, __A, __B); } @@ -4246,37 +4261,41 @@ __m512i test_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { __m512i test_mm512_sra_epi32(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_sra_epi32 - // CHECK: @llvm.x86.avx512.mask.psra.d + // CHECK: @llvm.x86.avx512.psra.d.512 return _mm512_sra_epi32(__A, __B); } __m512i test_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_sra_epi32 - // CHECK: @llvm.x86.avx512.mask.psra.d + // CHECK: @llvm.x86.avx512.psra.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_sra_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_sra_epi32 - // CHECK: @llvm.x86.avx512.mask.psra.d + // CHECK: @llvm.x86.avx512.psra.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_sra_epi32(__U, __A, __B); } __m512i test_mm512_sra_epi64(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q + // CHECK: @llvm.x86.avx512.psra.q.512 return _mm512_sra_epi64(__A, __B); } __m512i test_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q + // CHECK: @llvm.x86.avx512.psra.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_sra_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q + // CHECK: @llvm.x86.avx512.psra.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_sra_epi64(__U, __A, __B); } @@ -4318,37 +4337,41 @@ __m512i test_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { __m512i test_mm512_srl_epi32(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_srl_epi32 - // CHECK: @llvm.x86.avx512.mask.psrl.d + // CHECK: @llvm.x86.avx512.psrl.d.512 return _mm512_srl_epi32(__A, __B); } __m512i test_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_srl_epi32 - // CHECK: @llvm.x86.avx512.mask.psrl.d + // CHECK: @llvm.x86.avx512.psrl.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_srl_epi32(__W, __U, __A, __B); } __m512i test_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_srl_epi32 - // CHECK: @llvm.x86.avx512.mask.psrl.d + // CHECK: @llvm.x86.avx512.psrl.d.512 + // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_srl_epi32(__U, __A, __B); } __m512i test_mm512_srl_epi64(__m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_srl_epi64 - // CHECK: @llvm.x86.avx512.mask.psrl.q + // CHECK: @llvm.x86.avx512.psrl.q.512 return _mm512_srl_epi64(__A, __B); } __m512i test_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_mask_srl_epi64 - // CHECK: @llvm.x86.avx512.mask.psrl.q + // CHECK: @llvm.x86.avx512.psrl.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_srl_epi64(__W, __U, __A, __B); } __m512i test_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { // CHECK-LABEL: @test_mm512_maskz_srl_epi64 - // CHECK: @llvm.x86.avx512.mask.psrl.q + // CHECK: @llvm.x86.avx512.psrl.q.512 + // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_srl_epi64(__U, __A, __B); } diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index e6b288f7c5..256311c14d 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -5358,73 +5358,81 @@ __m256i test_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A) { __m128i test_mm_sra_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q.128 + // CHECK: @llvm.x86.avx512.psra.q.128 return _mm_sra_epi64(__A, __B); } __m128i test_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_mask_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q.128 + // CHECK: @llvm.x86.avx512.psra.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_sra_epi64(__W, __U, __A, __B); } __m128i test_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: @test_mm_maskz_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q.128 + // CHECK: @llvm.x86.avx512.psra.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_sra_epi64(__U, __A, __B); } __m256i test_mm256_sra_epi64(__m256i __A, __m128i __B) { // CHECK-LABEL: @test_mm256_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q.256 + // CHECK: @llvm.x86.avx512.psra.q.256 return _mm256_sra_epi64(__A, __B); } __m256i test_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { // CHECK-LABEL: @test_mm256_mask_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q.256 + // CHECK: @llvm.x86.avx512.psra.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_sra_epi64(__W, __U, __A, __B); } __m256i test_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { // CHECK-LABEL: @test_mm256_maskz_sra_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.q.256 + // CHECK: @llvm.x86.avx512.psra.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_sra_epi64(__U, __A, __B); } __m128i test_mm_srai_epi64(__m128i __A) { // CHECK-LABEL: @test_mm_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.128 + // CHECK: @llvm.x86.avx512.psrai.q.128 return _mm_srai_epi64(__A, 5); } __m128i test_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_mask_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.128 + // CHECK: @llvm.x86.avx512.psrai.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_srai_epi64(__W, __U, __A, 5); } __m128i test_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: @test_mm_maskz_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.128 + // CHECK: @llvm.x86.avx512.psrai.q.128 + // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_srai_epi64(__U, __A, 5); } __m256i test_mm256_srai_epi64(__m256i __A) { // CHECK-LABEL: @test_mm256_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.256 + // CHECK: @llvm.x86.avx512.psrai.q.256 return _mm256_srai_epi64(__A, 5); } __m256i test_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_mask_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.256 + // CHECK: @llvm.x86.avx512.psrai.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_srai_epi64(__W, __U, __A, 5); } __m256i test_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: @test_mm256_maskz_srai_epi64 - // CHECK: @llvm.x86.avx512.mask.psra.qi.256 + // CHECK: @llvm.x86.avx512.psrai.q.256 + // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_srai_epi64(__U, __A, 5); } -- 2.40.0