From: Michael Zuckerman Date: Thu, 2 Jun 2016 07:44:08 +0000 (+0000) Subject: [Clang][AVX512][INTRINSICS] adding round cvt and fix regular cvtps_ph X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=962e34b6131402252238777d1db178b1d5faa3a3;p=clang [Clang][AVX512][INTRINSICS] adding round cvt and fix regular cvtps_ph Differential Revision: http://reviews.llvm.org/D20870 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@271498 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h index 1d8f53e7ca..3846970322 100644 --- a/lib/Headers/avx512vlintrin.h +++ b/lib/Headers/avx512vlintrin.h @@ -9383,26 +9383,58 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) (__mmask8) __U); } -#define _mm_mask_cvtps_ph(W, U, A, I) __extension__ ({ \ +static __inline __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) __U); +} + +#define _mm_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ (__mmask8)(U)); }) -#define _mm_maskz_cvtps_ph(U, A, I) __extension__ ({ \ +#define _mm_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)); }) -#define _mm256_mask_cvtps_ph(W, U, A, I) __extension__ ({ \ +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) __W, + (__mmask8) __U); +} + +static __inline __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) +{ + return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, + (__v8hi) _mm_setzero_si128(), + (__mmask8) __U); +} +#define _mm256_mask_cvt_roundps_ph(W, U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ (__mmask8)(U)); }) -#define _mm256_maskz_cvtps_ph(U, A, I) __extension__ ({ \ +#define _mm256_maskz_cvt_roundps_ph(U, A, I) __extension__ ({ \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)); }) + #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_BOTH diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c index f9db573d45..16b934588d 100644 --- a/test/CodeGen/avx512vl-builtins.c +++ b/test/CodeGen/avx512vl-builtins.c @@ -6726,24 +6726,47 @@ __m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) { __m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_mask_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 - return _mm_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) { // CHECK-LABEL: @test_mm_maskz_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 - return _mm_maskz_cvtps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm_maskz_cvtps_ph(__U, __A); } __m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_mask_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm256_mask_cvtps_ph(__W, __U, __A); } __m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) { // CHECK-LABEL: @test_mm256_maskz_cvtps_ph // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 - return _mm256_maskz_cvtps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); + return _mm256_maskz_cvtps_ph(__U, __A); } +__m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_mask_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + return _mm_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m128i test_mm_maskz_cvt_roundps_ph(__mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_maskz_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128 + return _mm_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m128i test_mm256_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_mask_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + return _mm256_mask_cvt_roundps_ph(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m128i test_mm256_maskz_cvt_roundps_ph(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_maskz_cvt_roundps_ph + // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256 + return _mm256_maskz_cvt_roundps_ph(__U, __A, _MM_FROUND_CUR_DIRECTION); +}