From 26f4742738df55dc94df441f85c9e7c1e207f1ed Mon Sep 17 00:00:00 2001 From: Michael Zuckerman Date: Wed, 1 Jun 2016 14:41:41 +0000 Subject: [PATCH] [Clang][Intrinsics][avx512] Continue Adding round cvt to clang And remove trailing spaces in intrinsic f test Differential Revision: http://reviews.llvm.org/D20810 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@271398 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Headers/avx512fintrin.h | 66 ++++++++++++++++++- test/CodeGen/avx512f-builtins.c | 108 +++++++++++++++++++++++++------- 2 files changed, 151 insertions(+), 23 deletions(-) diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h index 4c2d5c848c..2f311f79f3 100644 --- a/lib/Headers/avx512fintrin.h +++ b/lib/Headers/avx512fintrin.h @@ -3585,6 +3585,27 @@ _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) /* Conversion */ +#define _mm512_cvtt_roundps_epu32( __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_undefined_epi32 (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_cvtt_roundps_epu32( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvtt_roundps_epu32( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttps2udq512_mask ((__v16sf)( __A),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16)( __U),( __R));\ +}) + + static __inline __m512i __DEFAULT_FN_ATTRS _mm512_cvttps_epu32(__m512 __A) { @@ -3795,6 +3816,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) _MM_FROUND_CUR_DIRECTION); } +#define _mm512_cvt_roundps_ph( __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)_mm256_undefined_si256 (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_cvt_roundps_ph( __U, __W, __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)( __U),\ + (__mmask16)( __W));\ +}) + +#define _mm512_maskz_cvt_roundps_ph( __W, __A, __I) __extension__ ({ \ + (__m256i)__builtin_ia32_vcvtps2ph512_mask ((__v16sf)( __A),\ + (__I),\ + (__v16hi)\ + _mm256_setzero_si256 (),\ + (__mmask16)( __W));\ +}) + #define _mm512_cvtps_ph(A, I) __extension__ ({ \ (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ @@ -3809,7 +3852,28 @@ _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ (__v16hi)_mm256_setzero_si256(), \ (__mmask16)(W)); }) - + +#define _mm512_cvt_roundph_ps( __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_cvt_roundph_ps( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_cvt_roundph_ps( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_vcvtph2ps512_mask ((__v16hi)( __A),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + + static __inline __m512 __DEFAULT_FN_ATTRS _mm512_cvtph_ps(__m256i __A) { diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c index 6e49d262f9..0acd3c81a4 100644 --- a/test/CodeGen/avx512f-builtins.c +++ b/test/CodeGen/avx512f-builtins.c @@ -2948,19 +2948,19 @@ __m512 test_mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B) { int test_mm_cvt_roundsd_si32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_si32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 - return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundsd_i32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_i32 // CHECK: @llvm.x86.avx512.vcvtsd2si32 - return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvt_roundsd_u32(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_u32 // CHECK: @llvm.x86.avx512.vcvtsd2usi32 - return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvtsd_u32(__m128d __A) { @@ -2972,7 +2972,7 @@ unsigned test_mm_cvtsd_u32(__m128d __A) { unsigned long long test_mm_cvt_roundsd_u64(__m128d __A) { // CHECK-LABEL: @test_mm_cvt_roundsd_u64 // CHECK: @llvm.x86.avx512.vcvtsd2usi64 - return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvtsd_u64(__m128d __A) { @@ -2984,31 +2984,31 @@ unsigned long long test_mm_cvtsd_u64(__m128d __A) { int test_mm_cvt_roundss_si32(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_si32 // CHECK: @llvm.x86.avx512.vcvtss2si32 - return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundss_i32(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_i32 // CHECK: @llvm.x86.avx512.vcvtss2si32 - return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvt_roundss_si64(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_si64 // CHECK: @llvm.x86.avx512.vcvtss2si64 - return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvt_roundss_i64(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_i64 // CHECK: @llvm.x86.avx512.vcvtss2si64 - return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvt_roundss_u32(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_u32 // CHECK: @llvm.x86.avx512.vcvtss2usi32 - return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvtss_u32(__m128 __A) { @@ -3020,7 +3020,7 @@ unsigned test_mm_cvtss_u32(__m128 __A) { unsigned long long test_mm_cvt_roundss_u64(__m128 __A) { // CHECK-LABEL: @test_mm_cvt_roundss_u64 // CHECK: @llvm.x86.avx512.vcvtss2usi64 - return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvtss_u64(__m128 __A) { @@ -3032,13 +3032,13 @@ unsigned long long test_mm_cvtss_u64(__m128 __A) { int test_mm_cvtt_roundsd_i32(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_i32 // CHECK: @llvm.x86.avx512.cvttsd2si - return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvtt_roundsd_si32(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_si32 // CHECK: @llvm.x86.avx512.cvttsd2si - return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvttsd_i32(__m128d __A) { @@ -3050,13 +3050,13 @@ int test_mm_cvttsd_i32(__m128d __A) { unsigned long long test_mm_cvtt_roundsd_si64(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_si64 // CHECK: @llvm.x86.avx512.cvttsd2si64 - return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvtt_roundsd_i64(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_i64 // CHECK: @llvm.x86.avx512.cvttsd2si64 - return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_i64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvttsd_i64(__m128d __A) { @@ -3068,7 +3068,7 @@ long long test_mm_cvttsd_i64(__m128d __A) { unsigned test_mm_cvtt_roundsd_u32(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_u32 // CHECK: @llvm.x86.avx512.cvttsd2usi - return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvttsd_u32(__m128d __A) { @@ -3080,7 +3080,7 @@ unsigned test_mm_cvttsd_u32(__m128d __A) { unsigned long long test_mm_cvtt_roundsd_u64(__m128d __A) { // CHECK-LABEL: @test_mm_cvtt_roundsd_u64 // CHECK: @llvm.x86.avx512.cvttsd2usi64 - return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundsd_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvttsd_u64(__m128d __A) { @@ -3092,13 +3092,13 @@ unsigned long long test_mm_cvttsd_u64(__m128d __A) { int test_mm_cvtt_roundss_i32(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_i32 // CHECK: @llvm.x86.avx512.cvttss2si - return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_i32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvtt_roundss_si32(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_si32 // CHECK: @llvm.x86.avx512.cvttss2si - return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_si32(__A, _MM_FROUND_CUR_DIRECTION); } int test_mm_cvttss_i32(__m128 __A) { @@ -3110,13 +3110,13 @@ int test_mm_cvttss_i32(__m128 __A) { float test_mm_cvtt_roundss_i64(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_i64 // CHECK: @llvm.x86.avx512.cvttss2si64 - return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_i64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvtt_roundss_si64(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_si64 // CHECK: @llvm.x86.avx512.cvttss2si64 - return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_si64(__A, _MM_FROUND_CUR_DIRECTION); } long long test_mm_cvttss_i64(__m128 __A) { @@ -3128,7 +3128,7 @@ long long test_mm_cvttss_i64(__m128 __A) { unsigned test_mm_cvtt_roundss_u32(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_u32 // CHECK: @llvm.x86.avx512.cvttss2usi - return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_u32(__A, _MM_FROUND_CUR_DIRECTION); } unsigned test_mm_cvttss_u32(__m128 __A) { @@ -3140,7 +3140,7 @@ unsigned test_mm_cvttss_u32(__m128 __A) { unsigned long long test_mm_cvtt_roundss_u64(__m128 __A) { // CHECK-LABEL: @test_mm_cvtt_roundss_u64 // CHECK: @llvm.x86.avx512.cvttss2usi64 - return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); + return _mm_cvtt_roundss_u64(__A, _MM_FROUND_CUR_DIRECTION); } unsigned long long test_mm_cvttss_u64(__m128 __A) { @@ -3149,6 +3149,70 @@ unsigned long long test_mm_cvttss_u64(__m128 __A) { return _mm_cvttss_u64(__A); } +__m512i test_mm512_cvtt_roundps_epu32(__m512 __A) +{ + // CHECK-LABEL: @test_mm512_cvtt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + return _mm512_cvtt_roundps_epu32(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512i test_mm512_mask_cvtt_roundps_epu32(__m512i __W, __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + return _mm512_mask_cvtt_roundps_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512i test_mm512_maskz_cvtt_roundps_epu32( __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttps2udq.512 + + return _mm512_maskz_cvtt_roundps_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_cvt_roundps_ph(__m512 __A) +{ + // CHECK-LABEL: @test_mm512_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + return _mm512_cvt_roundps_ph(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_mask_cvt_roundps_ph(__m256i __W , __mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + return _mm512_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_maskz_cvt_roundps_ph(__mmask16 __U, __m512 __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.512 + return _mm512_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_cvt_roundph_ps(__m256i __A) +{ + // CHECK-LABEL: @test_mm512_cvt_roundph_ps + // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + return _mm512_cvt_roundph_ps(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_mask_cvt_roundph_ps(__m512 __W, __mmask16 __U, __m256i __A) +{ + // CHECK-LABEL: @test_mm512_mask_cvt_roundph_ps + // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + return _mm512_mask_cvt_roundph_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_maskz_cvt_roundph_ps(__mmask16 __U, __m256i __A) +{ + // CHECK-LABEL: @test_mm512_maskz_cvt_roundph_ps + // CHECK: @llvm.x86.avx512.mask.vcvtph2ps.512 + return _mm512_maskz_cvt_roundph_ps(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps -- 2.40.0