]> granicus.if.org Git - clang/commitdiff
[X86] Make _mm_mask_cvtps_ph, _mm_maskz_cvtps_ph, _mm256_mask_cvtps_ph, and _mm256_ma...
authorCraig Topper <craig.topper@intel.com>
Thu, 20 Jun 2019 18:24:29 +0000 (18:24 +0000)
committerCraig Topper <craig.topper@intel.com>
Thu, 20 Jun 2019 18:24:29 +0000 (18:24 +0000)
These intrinsics should always take an immediate for the rounding mode.
The base instruction comes from before EVEX embdedded rounding. The
user should always provide the immediate rather than us assuming
CUR_DIRECTION.

Make the 512-bit versions also explicit aliases instead of copy
pasting the code.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@363961 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Headers/avx512fintrin.h
lib/Headers/avx512vlintrin.h
test/CodeGen/avx512vl-builtins.c

index 1bd410d383d2fbfcb14d49e1150e405ad03b4736..132761f9ef5c2c01ed4222eac115c5da93dfa1c4 100644 (file)
@@ -3779,20 +3779,9 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
                                             (__v16hi)_mm256_setzero_si256(), \
                                             (__mmask16)(W))
 
-#define _mm512_cvtps_ph(A, I) \
-  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
-                                            (__v16hi)_mm256_setzero_si256(), \
-                                            (__mmask16)-1)
-
-#define _mm512_mask_cvtps_ph(U, W, A, I) \
-  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
-                                            (__v16hi)(__m256i)(U), \
-                                            (__mmask16)(W))
-
-#define _mm512_maskz_cvtps_ph(W, A, I) \
-  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
-                                            (__v16hi)_mm256_setzero_si256(), \
-                                            (__mmask16)(W))
+#define _mm512_cvtps_ph       _mm512_cvt_roundps_ph
+#define _mm512_mask_cvtps_ph  _mm512_mask_cvt_roundps_ph
+#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
 
 #define _mm512_cvt_roundph_ps(A, R) \
   (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
index db02d41284c0902f3e92cf045b0db91cdc51878e..9494fc8a6e59541d7c3f41a9016db1200c22b4a3 100644 (file)
@@ -8411,22 +8411,6 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
                 (__mmask8) __U);
 }
 
-static __inline __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
-                                                  (__v8hi) __W,
-                                                  (__mmask8) __U);
-}
-
-static __inline __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
-                                                  (__v8hi) _mm_setzero_si128 (),
-                                                  (__mmask8) __U);
-}
-
 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
   (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
                                          (__v8hi)(__m128i)(W), \
@@ -8437,21 +8421,9 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
                                          (__v8hi)_mm_setzero_si128(), \
                                          (__mmask8)(U))
 
-static __inline __m128i __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
-                                                      (__v8hi) __W,
-                                                      (__mmask8) __U);
-}
+#define _mm_mask_cvtps_ph  _mm_mask_cvt_roundps_ph
+#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
 
-static __inline __m128i __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
-{
-  return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
-                                                      (__v8hi) _mm_setzero_si128(),
-                                                      (__mmask8) __U);
-}
 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
   (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
                                             (__v8hi)(__m128i)(W), \
@@ -8462,6 +8434,9 @@ _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
                                             (__v8hi)_mm_setzero_si128(), \
                                             (__mmask8)(U))
 
+#define _mm256_mask_cvtps_ph  _mm256_mask_cvt_roundps_ph
+#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
+
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
index 8c9e15d41096dfaa632c0ca11e8b09cb2d6f9e71..12d99264ee42856cd54a22da26d522cd49cc07ec 100644 (file)
@@ -9726,25 +9726,25 @@ __m256 test_mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A) {
 __m128i test_mm_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_mask_cvtps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
-  return _mm_mask_cvtps_ph(__W, __U, __A);
+  return _mm_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
 
 __m128i test_mm_maskz_cvtps_ph(__mmask8 __U, __m128 __A) {
   // CHECK-LABEL: @test_mm_maskz_cvtps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.128
-  return _mm_maskz_cvtps_ph(__U, __A);
+  return _mm_maskz_cvtps_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
 
 __m128i test_mm256_mask_cvtps_ph(__m128i __W, __mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_mask_cvtps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
-  return _mm256_mask_cvtps_ph(__W, __U, __A);
+  return _mm256_mask_cvtps_ph(__W, __U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
 
 __m128i test_mm256_maskz_cvtps_ph(__mmask8 __U, __m256 __A) {
   // CHECK-LABEL: @test_mm256_maskz_cvtps_ph
   // CHECK: @llvm.x86.avx512.mask.vcvtps2ph.256
-  return _mm256_maskz_cvtps_ph(__U, __A);
+  return _mm256_maskz_cvtps_ph(__U, __A, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
 }
 
 __m128i test_mm_mask_cvt_roundps_ph(__m128i __W, __mmask8 __U, __m128 __A) {