From: Michael Zuckerman Date: Fri, 22 Apr 2016 10:06:10 +0000 (+0000) Subject: [Clang][AVX512][BUILTIN] Adding scalar intrinsics for rsqrt14 ,rcp14, getexp and... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c124e72f325cd82a73be28ebbbbc49588ad591ce;p=clang [Clang][AVX512][BUILTIN] Adding scalar intrinsics for rsqrt14 ,rcp14, getexp and getmant instruction set Differential Revision: http://reviews.llvm.org/D19326 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@267129 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 3301afe0df..42c0b71441 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -1021,6 +1021,24 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B) (__mmask8) -1); } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); +} + static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rsqrt14_sd(__m128d __A, __m128d __B) { @@ -1031,6 +1049,24 @@ _mm_rsqrt14_sd(__m128d __A, __m128d __B) (__mmask8) -1); } +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U); +} + static __inline__ __m512d __DEFAULT_FN_ATTRS _mm512_rcp14_pd(__m512d __A) { @@ -1058,6 +1094,24 @@ _mm_rcp14_ss(__m128 __A, __m128 __B) (__mmask8) -1); } +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) +{ + return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_ps (), + (__mmask8) __U); +} + static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_rcp14_sd(__m128d __A, __m128d __B) { @@ -1068,6 +1122,24 @@ _mm_rcp14_sd(__m128d __A, __m128d __B) (__mmask8) -1); } +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U); +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U); +} + static __inline __m512 __DEFAULT_FN_ATTRS _mm512_floor_ps(__m512 __A) { @@ -4008,6 +4080,42 @@ _mm_getexp_sd (__m128d __A, __m128d __B) (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_mask_getexp_round_sd( __W, __U, __A, __B, __R) __extension__ ({\ +__builtin_ia32_getexpsd128_round_mask ((__v2df) __A,\ + (__v2df) __B,\ + (__v2df) __W,\ + (__mmask8) __U,\ + __R);\ +}) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, + (__v2df) __B, + (__v2df) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_maskz_getexp_round_sd( __U, __A, __B, __R) __extension__ ({\ +__builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,\ + (__v2df) __B,\ + (__v2df) _mm_setzero_pd (),\ + (__mmask8) __U,\ + __R);\ +}) + #define _mm_getexp_round_ss( __A, __B, __R) __extension__ ({ \ __builtin_ia32_getexpss128_round_mask ((__v4sf)( __A),\ (__v4sf)( __B), (__v4sf) _mm_setzero_ps(), (__mmask8) -1,\ @@ -4021,6 +4129,42 @@ _mm_getexp_ss (__m128 __A, __m128 __B) (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_mask_getexp_ss (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_mask_getexp_round_ss( __W, __U, __A, __B, __R) __extension__ ({\ +__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\ + (__v4sf) __B,\ + (__v4sf) __W,\ + (__mmask8) __U,\ + __R);\ +}) + +static __inline__ __m128d __DEFAULT_FN_ATTRS +_mm_maskz_getexp_ss (__mmask8 __U, __m128d __A, __m128d __B) +{ + return (__m128d) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) _mm_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm_maskz_getexp_round_ss( __U, __A, __B, __R) __extension__ ({\ +__builtin_ia32_getexpss128_round_mask ((__v4sf) __A,\ + (__v4sf) __B,\ + (__v4sf) _mm_setzero_ps (),\ + (__mmask8) __U,\ + __R);\ +}) + #define _mm_getmant_round_sd( __A, __B, __C, __D, __R) __extension__ ({ \ __builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\ (__v2df)( __B),\ @@ -4035,6 +4179,42 @@ __builtin_ia32_getmantsd_round_mask ((__v2df)( __A),\ _MM_FROUND_CUR_DIRECTION);\ }) +#define _mm_mask_getmant_sd( __W, __U, __A, __B, __C, __D) __extension__ ({\ +__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\ + (__v2df) __B,\ + (( __D) << 2) |( __C),\ + (__v2df) __W,\ + (__mmask8) __U,\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm_mask_getmant_round_sd( __W, __U, __A, __B, __C, __D, __R)({\ +__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\ + (__v2df) __B,\ + (( __D) << 2) |( __C),\ + (__v2df) __W,\ + (__mmask8) __U,\ + __R);\ +}) + +#define _mm_maskz_getmant_sd( __U, __A, __B, __C, __D) __extension__ ({\ +__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\ + (__v2df) __B,\ + (( __D) << 2) |( __C),\ + (__v2df) _mm_setzero_pd (),\ + (__mmask8) __U,\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm_maskz_getmant_round_sd( __U, __A, __B, __C, __D, __R) __extension__ ({\ +__builtin_ia32_getmantsd_round_mask ( (__v2df) __A,\ + (__v2df) __B,\ + (( __D) << 2) |( __C),\ + (__v2df) _mm_setzero_pd (),\ + (__mmask8) __U,\ + __R);\ +}) + #define _mm_getmant_round_ss( __A, __B, __C, __D, __R) __extension__ ({ \ __builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\ (__v4sf)( __B),\ @@ -4049,6 +4229,41 @@ __builtin_ia32_getmantss_round_mask ((__v4sf)( __A),\ _MM_FROUND_CUR_DIRECTION);\ }) +#define _mm_mask_getmant_ss( __W, __U, __A, __B, __C, __D) __extension__ ({\ +__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\ + (__v4sf) __B,\ + (( __D) << 2) |( __C),\ + (__v4sf) __W,\ + (__mmask8) __U,\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm_mask_getmant_round_ss( __W, __U, __A, __B, __C, __D, __R)({\ +__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\ + (__v4sf) __B,\ + (( __D) << 2) |( __C),\ + (__v4sf) __W,\ + (__mmask8) __U,\ + __R);\ +}) + +#define _mm_maskz_getmant_ss( __U, __A, __B, __C, __D) __extension__ ({\ +__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\ + (__v4sf) __B,\ + (( __D) << 2) |( __C),\ + (__v4sf) _mm_setzero_pd (),\ + (__mmask8) __U,\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm_maskz_getmant_round_ss( __U, __A, __B, __C, __D, __R) __extension__ ({\ +__builtin_ia32_getmantss_round_mask ((__v4sf) __A,\ + (__v4sf) __B,\ + (( __D) << 2) |( __C),\ + (__v4sf) _mm_setzero_ps (),\ + (__mmask8) __U,\ + __R);\ +}) static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov (__mmask16 __A) diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 844c92f3f0..7bac43d809 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -2556,8 +2556,6 @@ __m128d test_mm_getmant_sd(__m128d __A, __m128d __B) { return _mm_getmant_sd(__A, __B, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_src); } - - __m128 test_mm_getmant_round_ss(__m128 __A, __m128 __B) { // CHECK-LABEL: @test_mm_getmant_round_ss // CHECK: @llvm.x86.avx512.mask.getmant.ss @@ -4372,3 +4370,147 @@ __m512i test_mm512_mask_i32gather_epi64(__m512i __v1_old, __mmask8 __mask, __m25 return _mm512_mask_i32gather_epi64(__v1_old, __mask, __index, __addr, 2); } +__m128d test_mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_mask_rsqrt14_sd + // CHECK: @llvm.x86.avx512.rsqrt14.sd + return _mm_mask_rsqrt14_sd(__W, __U, __A, __B); +} + +__m128d test_mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_maskz_rsqrt14_sd + // CHECK: @llvm.x86.avx512.rsqrt14.sd + return _mm_maskz_rsqrt14_sd(__U, __A, __B); +} + +__m128 test_mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_mask_rsqrt14_ss + // CHECK: @llvm.x86.avx512.rsqrt14.ss + return _mm_mask_rsqrt14_ss(__W, __U, __A, __B); +} + +__m128 test_mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_maskz_rsqrt14_ss + // CHECK: @llvm.x86.avx512.rsqrt14.ss + return _mm_maskz_rsqrt14_ss(__U, __A, __B); +} + +__m128d test_mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_mask_rcp14_sd + // CHECK: @llvm.x86.avx512.rcp14.sd + return _mm_mask_rcp14_sd(__W, __U, __A, __B); +} + +__m128d test_mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_maskz_rcp14_sd + // CHECK: @llvm.x86.avx512.rcp14.sd + return _mm_maskz_rcp14_sd(__U, __A, __B); +} + +__m128 test_mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_mask_rcp14_ss + // CHECK: @llvm.x86.avx512.rcp14.ss + return _mm_mask_rcp14_ss(__W, __U, __A, __B); +} + +__m128 test_mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_maskz_rcp14_ss + // CHECK: @llvm.x86.avx512.rcp14.ss + return _mm_maskz_rcp14_ss(__U, __A, __B); +} + +__m128d test_mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_mask_getexp_sd + // CHECK: @llvm.x86.avx512.mask.getexp.sd + return _mm_mask_getexp_sd(__W, __U, __A, __B); +} + +__m128d test_mm_mask_getexp_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_mask_getexp_round_sd + // CHECK: @llvm.x86.avx512.mask.getexp.sd + return _mm_mask_getexp_round_sd(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128d test_mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_maskz_getexp_sd + // CHECK: @llvm.x86.avx512.mask.getexp.sd + return _mm_maskz_getexp_sd(__U, __A, __B); +} + +__m128d test_mm_maskz_getexp_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_maskz_getexp_round_sd + // CHECK: @llvm.x86.avx512.mask.getexp.sd + return _mm_maskz_getexp_round_sd(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_mask_getexp_ss + // CHECK: @llvm.x86.avx512.mask.getexp.ss + return _mm_mask_getexp_ss(__W, __U, __A, __B); +} + +__m128 test_mm_mask_getexp_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_mask_getexp_round_ss + // CHECK: @llvm.x86.avx512.mask.getexp.ss + return _mm_mask_getexp_round_ss(__W, __U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_maskz_getexp_ss + // CHECK: @llvm.x86.avx512.mask.getexp.ss + return _mm_maskz_getexp_ss(__U, __A, __B); +} + +__m128 test_mm_maskz_getexp_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_maskz_getexp_round_ss + // CHECK: @llvm.x86.avx512.mask.getexp.ss + return _mm_maskz_getexp_round_ss(__U, __A, __B, _MM_FROUND_CUR_DIRECTION); +} + +__m128d test_mm_mask_getmant_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_mask_getmant_sd + // CHECK: @llvm.x86.avx512.mask.getmant.sd + return _mm_mask_getmant_sd(__W, __U, __A, __B, 1, 2); +} + +__m128d test_mm_mask_getmant_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_mask_getmant_round_sd + // CHECK: @llvm.x86.avx512.mask.getmant.sd + return _mm_mask_getmant_round_sd(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); +} + +__m128d test_mm_maskz_getmant_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_maskz_getmant_sd + // CHECK: @llvm.x86.avx512.mask.getmant.sd + return _mm_maskz_getmant_sd(__U, __A, __B, 1, 2); +} + +__m128d test_mm_maskz_getmant_round_sd(__mmask8 __U, __m128d __A, __m128d __B){ + // CHECK-LABEL: @test_mm_maskz_getmant_round_sd + // CHECK: @llvm.x86.avx512.mask.getmant.sd + return _mm_maskz_getmant_round_sd(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_mask_getmant_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_mask_getmant_ss + // CHECK: @llvm.x86.avx512.mask.getmant.ss + return _mm_mask_getmant_ss(__W, __U, __A, __B, 1, 2); +} + +__m128 test_mm_mask_getmant_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_mask_getmant_round_ss + // CHECK: @llvm.x86.avx512.mask.getmant.ss + return _mm_mask_getmant_round_ss(__W, __U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); +} + +__m128 test_mm_maskz_getmant_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_maskz_getmant_ss + // CHECK: @llvm.x86.avx512.mask.getmant.ss + return _mm_maskz_getmant_ss(__U, __A, __B, 1, 2); +} + +__m128 test_mm_maskz_getmant_round_ss(__mmask8 __U, __m128 __A, __m128 __B){ + // CHECK-LABEL: @test_mm_maskz_getmant_round_ss + // CHECK: @llvm.x86.avx512.mask.getmant.ss + return _mm_maskz_getmant_round_ss(__U, __A, __B, 1, 2, _MM_FROUND_CUR_DIRECTION); +} +