From a2ccbb6d713a567bc8c2feb5c4b151c7bbff7b5d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 1 Feb 2015 07:35:35 +0000 Subject: [PATCH] [X86] Change rounding parameter of all the AVX512 builtins to an ICE. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@227712 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/BuiltinsX86.def | 74 ++++++------- lib/Headers/avx512fintrin.h | 156 ++++++++++------------------ 2 files changed, 89 insertions(+), 141 deletions(-) diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def index da7c82c647..8cd7d536c7 100644 --- a/include/clang/Basic/BuiltinsX86.def +++ b/include/clang/Basic/BuiltinsX86.def @@ -738,12 +738,12 @@ BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "") BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "") BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "") BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "") -BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUci", "") -BUILTIN(__builtin_ia32_vfmsubpd512_mask, "V8dV8dV8dV8dUci", "") -BUILTIN(__builtin_ia32_vfnmaddpd512_mask, "V8dV8dV8dV8dUci", "") -BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsi", "") -BUILTIN(__builtin_ia32_vfmsubps512_mask, "V16fV16fV16fV16fUsi", "") -BUILTIN(__builtin_ia32_vfnmaddps512_mask, "V16fV16fV16fV16fUsi", "") +BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_vfmsubpd512_mask, "V8dV8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_vfnmaddpd512_mask, "V8dV8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "") +BUILTIN(__builtin_ia32_vfmsubps512_mask, "V16fV16fV16fV16fUsIi", "") +BUILTIN(__builtin_ia32_vfnmaddps512_mask, "V16fV16fV16fV16fUsIi", "") // XOP BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "") @@ -820,29 +820,29 @@ BUILTIN(__builtin_ia32_rdtsc, "ULLi", "") BUILTIN(__builtin_ia32_rdtscp, "ULLiUi*", "") // AVX-512 -BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUciC", "") -BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsiC", "") +BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsIi", "") BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "") BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "") BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "") BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "") -BUILTIN(__builtin_ia32_rsqrt28sd_mask, "V2dV2dV2dV2dUciC", "") -BUILTIN(__builtin_ia32_rsqrt28ss_mask, "V4fV4fV4fV4fUciC", "") -BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUciC", "") -BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsiC", "") +BUILTIN(__builtin_ia32_rsqrt28sd_mask, "V2dV2dV2dV2dUcIi", "") +BUILTIN(__builtin_ia32_rsqrt28ss_mask, "V4fV4fV4fV4fUcIi", "") +BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "") BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "") BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "") BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "") BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "") -BUILTIN(__builtin_ia32_rcp28sd_mask, "V2dV2dV2dV2dUciC", "") -BUILTIN(__builtin_ia32_rcp28ss_mask, "V4fV4fV4fV4fUciC", "") -BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUciC", "") -BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsiC", "") -BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsiC", "") -BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsiC", "") -BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUciC", "") -BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUciC", "") -BUILTIN(__builtin_ia32_cmpps512_mask, "UsV16fV16fIcUsi", "") +BUILTIN(__builtin_ia32_rcp28sd_mask, "V2dV2dV2dV2dUcIi", "") +BUILTIN(__builtin_ia32_rcp28ss_mask, "V4fV4fV4fV4fUcIi", "") +BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "") +BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "") +BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "") +BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "") +BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "") +BUILTIN(__builtin_ia32_cmpps512_mask, "UsV16fV16fIcUsIi", "") BUILTIN(__builtin_ia32_pcmpeqb512_mask, "LLiV64cV64cLLi", "") BUILTIN(__builtin_ia32_pcmpeqd512_mask, "sV16iV16is", "") BUILTIN(__builtin_ia32_pcmpeqq512_mask, "cV8LLiV8LLic", "") @@ -867,24 +867,24 @@ BUILTIN(__builtin_ia32_pcmpgtb128_mask, "sV16cV16cs", "") BUILTIN(__builtin_ia32_pcmpgtd128_mask, "cV4iV4ic", "") BUILTIN(__builtin_ia32_pcmpgtq128_mask, "cV2LLiV2LLic", "") BUILTIN(__builtin_ia32_pcmpgtw128_mask, "cV8sV8sc", "") -BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIcUci", "") -BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fiCV16fUsiC", "") -BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8diCV8dUciC", "") -BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsiC", "") -BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUciC", "") -BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsiC", "") -BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUciC", "") -BUILTIN(__builtin_ia32_minps512_mask, "V16fV16fV16fV16fUsiC", "") -BUILTIN(__builtin_ia32_minpd512_mask, "V8dV8dV8dV8dUciC", "") -BUILTIN(__builtin_ia32_maxps512_mask, "V16fV16fV16fV16fUsiC", "") -BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUciC", "") -BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsiC", "") -BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsiC", "") +BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIcUcIi", "") +BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "") +BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "") +BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "") +BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "") +BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "") +BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "") +BUILTIN(__builtin_ia32_minps512_mask, "V16fV16fV16fV16fUsIi", "") +BUILTIN(__builtin_ia32_minpd512_mask, "V8dV8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_maxps512_mask, "V16fV16fV16fV16fUsIi", "") +BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "") +BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "") +BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "") BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "") BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "") -BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUciC", "") -BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fiCV16sUs", "") -BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsiC", "") +BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "") +BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "") +BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "") BUILTIN(__builtin_ia32_pabsd512_mask, "V16iV16iV16iUs", "") BUILTIN(__builtin_ia32_pabsq512_mask, "V8LLiV8LLiV8LLiUc", "") BUILTIN(__builtin_ia32_pmaxsd512_mask, "V16iV16iV16iV16iUs", "") diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index d5e04ee1d2..8aa6cecf9d 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -492,20 +492,13 @@ _mm512_abs_epi32(__m512i __A) (__mmask16) -1); } -static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) -_mm512_roundscale_ps(__m512 __A, const int __imm) -{ - return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, - (__v16sf) __A, -1, - _MM_FROUND_CUR_DIRECTION); -} -static __inline __m512d __attribute__ ((__always_inline__, __nodebug__)) -_mm512_roundscale_pd(__m512d __A, const int __imm) -{ - return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, - (__v8df) __A, -1, - _MM_FROUND_CUR_DIRECTION); -} +#define _mm512_roundscale_ps(A, B) __extension__ ({ \ + (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \ + -1, _MM_FROUND_CUR_DIRECTION); }) + +#define _mm512_roundscale_pd(A, B) __extension__ ({ \ + (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \ + -1, _MM_FROUND_CUR_DIRECTION); }) static __inline__ __m512d __attribute__((__always_inline__, __nodebug__)) _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) @@ -713,25 +706,15 @@ _mm512_cvttps_epu32(__m512 __A) _MM_FROUND_CUR_DIRECTION); } -static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) -_mm512_cvt_roundepi32_ps(__m512i __A, const int __R) -{ - return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1, - __R); -} +#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) -static __inline __m512 __attribute__ (( __always_inline__, __nodebug__)) -_mm512_cvt_roundepu32_ps(__m512i __A, const int __R) -{ - return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, - (__v16sf) - _mm512_setzero_ps (), - (__mmask16) -1, - __R); -} +#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ + (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \ + (__v16sf)_mm512_setzero_ps(), \ + (__mmask16)-1, (R)); }) static __inline __m512d __attribute__ (( __always_inline__, __nodebug__)) _mm512_cvtepi32_pd(__m256i __A) @@ -750,25 +733,16 @@ _mm512_cvtepu32_pd(__m256i __A) _mm512_setzero_pd (), (__mmask8) -1); } -static __inline __m256 __attribute__ (( __always_inline__, __nodebug__)) -_mm512_cvt_roundpd_ps(__m512d __A, const int __R) -{ - return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, - (__v8sf) - _mm256_setzero_ps (), - (__mmask8) -1, - __R); -} -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvtps_ph(__m512 __A, const int __I) -{ - return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, - __I, - (__v16hi) - _mm256_setzero_si256 (), - -1); -} +#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ + (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_cvtps_ph(A, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \ + (__v16hi)_mm256_setzero_si256(), \ + -1); }) static __inline __m512 __attribute__ ((__always_inline__, __nodebug__)) _mm512_cvtph_ps(__m256i __A) @@ -798,61 +772,35 @@ _mm512_cvttpd_epi32(__m512d a) _MM_FROUND_CUR_DIRECTION); } -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvtt_roundpd_epi32(__m512d __A, const int __R) -{ - return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1, - __R); -} -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvtt_roundps_epi32(__m512 __A, const int __R) -{ - return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1, - __R); -} +#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (R)); }) -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundps_epi32(__m512 __A, const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1, - __R); -} -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundpd_epi32(__m512d __A, const int __R) -{ - return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1, - __R); -} -static __inline __m512i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundps_epu32(__m512 __A, const int __R) -{ - return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, - (__v16si) - _mm512_setzero_si512 (), - (__mmask16) -1, - __R); -} -static __inline __m256i __attribute__ ((__always_inline__, __nodebug__)) -_mm512_cvt_roundpd_epu32(__m512d __A, const int __R) -{ - return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) -1, - __R); -} +#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8)-1, (R)); }) + +#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ + (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \ + (__v16si)_mm512_setzero_si512(), \ + (__mmask16)-1, (R)); }) + +#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ + (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \ + (__v8si)_mm256_setzero_si256(), \ + (__mmask8) -1, (R)); }) /* Unpack and Interleave */ static __inline __m512d __attribute__((__always_inline__, __nodebug__)) -- 2.40.0